[DATETIME]
-start = 01/01/2010 01:00:00
-end = 12/31/2010 23:00:00
-hourStep = 6
+start = 01/01/2006 00:00:00
+end = 12/31/2017 23:00:00
+hourStep = 1
[FEATURES]
[TARGET]
config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+
+
+[LEARNER]
+config = (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg'
\ No newline at end of file
--- /dev/null
+[MODEL]
+method = xgboost
\ No newline at end of file
engine.add_preprocessing()
+ engine.learn()
'''target = toarea(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
from .source.ephemeris import Ephemeris
from .source.meteofrance import MeteoFrance
+from .learn.learning import Learning
from .learn.preprocessing import Preprocessing
from .target.target import Target
def add_preprocessing(self):
- process = Preprocessing(config_file = self._config,
- dict_features = self.X,
- dict_target = self.y)
- print(process.dataframe.head(n=2))
+ self._preproc = Preprocessing(config_file = self._config,
+ dict_features = self.X,
+ dict_target = self.y)
+ def learn(self):
+ history = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
+ self._learner = Learning(config_file = eval(self._config['LEARNER']['config']),
+ X = self._preproc.dataframe, y = list(self.y.values())[history:])
+
@property
def X(self):
--- /dev/null
+from configparser import ConfigParser
+from math import sqrt
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+from sklearn.model_selection import train_test_split
+
+import xgboost
+
+class Learning:
+
+ def __init__(self, config_file = None,
+ X = None, y = None):
+ self._config = ConfigParser()
+ self._config.read(config_file)
+
+ df = X
+ df['cible'] = y
+
+ print(df.head())
+
+ train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
+ train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
+
+ X_test = test_set.drop('cible', axis = 1)
+ y_test = test_set['cible'].copy()
+
+ X_train = train_set.drop('cible', axis=1)
+ y_train = train_set['cible'].copy()
+ X_val = val_set.drop('cible', axis=1)
+ y_val = val_set['cible'].copy()
+
+
+ if self._config['MODEL']['method'] == 'xgboost':
+ xgb_reg = xgboost.XGBRegressor(learning_rate = 0.01,
+ max_depth = 10,
+ random_state=42,
+ n_estimators = 173,
+ n_jobs=-1,
+ objective = 'count:poisson')
+
+ xgb_reg.fit(X_train, y_train,
+ eval_set=[(X_val, y_val)],
+ early_stopping_rounds=10)
+
+ y_test_pred = xgb_reg.predict(X_test)
+ print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))
\ No newline at end of file
'''
logger.info("Integrating previous nb of interventions as features")
nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
- print(len(self._dataframe))
- print(self._dataframe.head(4))
for k in range(1,nb_lines+1):
name = 'history_'+str(nb_lines-k+1)
self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
self._numerical_columns.append(name)
self._dataframe = self._dataframe[nb_lines:]
- print(self._dataframe.head(4))
- print(len(self._dataframe))
self._config = ConfigParser()
self._config.read(config_file)
+ self._latitude = self._config['POSITION'].getfloat('latitude')
+ self._longitude = self._config['POSITION'].getfloat('longitude')
+
self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
self._dated_features = None
scipy==1.4.1
Shapely==1.7.0
six==1.14.0
+xgboost==0.90
xlrd==1.2.0