+ logger.info("Start learning")
+ if self._config['MODEL']['method'] == 'xgboost':
+ logger.info("Using xgboost regressor")
+ self._reg = xgboost.XGBRegressor(learning_rate=self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
+ max_depth=self._config['HYPERPARAMETERS'].getint('max_depth'),
+ random_state=self._config['HYPERPARAMETERS'].getint('random_state'),
+ n_estimators=self._config['HYPERPARAMETERS'].getint('n_estimators'),
+ n_jobs=self._config['HYPERPARAMETERS'].getint('n_jobs'),
+ objective='count:poisson')
+
+ self._reg.fit(X_train, y_train,
+ eval_set=[(X_val, y_val)],
+ early_stopping_rounds=10)
+ elif self._config['MODEL']['method'] == 'lightgbm':
+ train_data = lgb.Dataset(X_train, label=y_train)
+ val_data = lgb.Dataset(X_val, label=y_val)
+ num_round = self._config['HYPERPARAMETERS'].getint('num_round')
+ param = {
+ 'learning_rate': self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
+ 'metric': self._config['HYPERPARAMETERS'].get('metric'),
+ 'num_iterations': self._config['HYPERPARAMETERS'].getint('num_iterations'),
+ 'num_leaves': self._config['HYPERPARAMETERS'].getint('num_leaves'),
+ 'objective': self._config['HYPERPARAMETERS'].get('objective')
+ }
+ self._reg = lgb.train(param, train_data, num_round, valid_sets=[val_data])
+
+ def _evaluate(self):
+ logger.info("Evaluation of the learner")
+ y_test_pred = self._reg.predict(self._X_test)
+ txt = f"Average interventions per time unit: {mean(self._df.cible)}\n"
+ txt += f"Standard deviation: {stdev(self._df.cible)}\n\n"
+
+ txt += f"Mean absolute error: {mean_absolute_error(y_test_pred, self._y_test)}\n"
+ txt += f"Root mean squared error: {sqrt(mean_squared_error(y_test_pred, self._y_test))}\n\n"
+
+ for k in range(10):
+ txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
+
+ print(txt)
+ with open(self._filename + ".result", 'w') as f:
+ f.write(txt)
+
+ y_true = self._df[self._df.year == self._df.year.max()].cible
+ x_true = self._df[self._df.year == self._df.year.max()].drop('cible', axis=1)
+
+ yy_test_pred = self._reg.predict(x_true)
+ P.figure(figsize=(36, 16))
+ P.plot(list(y_true)[:300], color='blue', label='actual')
+ P.plot(yy_test_pred[:300], color='red', label='predicted')
+ P.title('Predictions for 2018')
+ P.xlabel('Hour in the year')
+ P.ylabel('Number of cumulated interventions')
+ P.legend()
+ P.savefig(self._filename + ".png")
+
+ yy_test_pred = self._reg.predict(self._X_test)
+ P.figure(figsize=(36, 16))
+ P.plot(list(self._y_test)[:300], color='blue', label='actual')
+ P.plot(yy_test_pred[:300], color='red', label='predicted')
+ P.title('Predictions for test set')
+ P.xlabel('Hour in the year')
+ P.ylabel('Number of cumulated interventions')
+ P.legend()
+ P.savefig(self._filename + "-test.png")