From: Christophe Guyeux Date: Sun, 23 Feb 2020 09:38:01 +0000 (+0100) Subject: lightgbm is now working X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/predictops.git/commitdiff_plain/90e69cb2125d4bae76a27b9c38defb4f70bf2ca6?ds=sidebyside lightgbm is now working --- diff --git a/config/learn.cfg b/config/learn.cfg index 73379cf..aef0edd 100644 --- a/config/learn.cfg +++ b/config/learn.cfg @@ -1,7 +1,7 @@ [DATETIME] start = 01/01/2006 00:00:00 end = 12/31/2019 23:00:00 -hourStep = 3 +hourStep = 1 [FEATURES] @@ -24,13 +24,13 @@ order = 3 [HISTORY_KNOWLEDGE] -nb_lines = 24//3*7*4 +nb_lines = 24*7*4 [TARGET] config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg' cumulative = True -horizon = 1 +horizon = 0 [LEARNER] -config = (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg' \ No newline at end of file +config = (Path.cwd() / 'config') / 'learners' / 'lightgbm.cfg' \ No newline at end of file diff --git a/config/learners/lightgbm.cfg b/config/learners/lightgbm.cfg index ef062b3..b5887f0 100644 --- a/config/learners/lightgbm.cfg +++ b/config/learners/lightgbm.cfg @@ -4,7 +4,7 @@ method = lightgbm [HYPERPARAMETERS] learning_rate = 0.1 metric = auc -num_iterations = 100 +num_iterations = 1000 num_round = 10 num_leaves = 31 objective = poisson diff --git a/predictops/learn/learning.py b/predictops/learn/learning.py index 959271d..fb3675d 100644 --- a/predictops/learn/learning.py +++ b/predictops/learn/learning.py @@ -10,6 +10,7 @@ from statistics import mean, stdev import lightgbm as lgb import matplotlib import os +import pandas as pd import pylab as P import xgboost @@ -25,8 +26,16 @@ class Learning: self._config.read(config_file) self._file_name = file_name logger.info("Dealing with the horizon of prediction") - self._X = X[:-horizon] - self._y = y[horizon:] + if horizon: + self._X = X[:-horizon] + self._y = y[horizon:] + else: + self._X = X + self._y = y + rep = (Path.cwd() / self._file_name) + rep.mkdir() + self._filename = str(self._file_name / os.path.basename(self._file_name)) + self._X.to_csv(self._filename + '.csv') self._learn() self._evaluate() @@ -84,10 +93,7 @@ class Learning: txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n" print(txt) - rep = (Path.cwd() / self._file_name) - rep.mkdir() - filename = str(self._file_name / os.path.basename(self._file_name)) - with open(filename + ".result", 'w') as f: + with open(self._filename + ".result", 'w') as f: f.write(txt) y_true = self._df[self._df.year == self._df.year.max()].cible @@ -101,7 +107,7 @@ class Learning: P.xlabel('Hour in the year') P.ylabel('Number of cumulated interventions') P.legend() - P.savefig(filename + ".png") + P.savefig(self._filename + ".png") yy_test_pred = self._reg.predict(self._X_test) P.figure(figsize=(36, 16)) @@ -111,10 +117,10 @@ class Learning: P.xlabel('Hour in the year') P.ylabel('Number of cumulated interventions') P.legend() - P.savefig(filename + "-test.png") + P.savefig(self._filename + "-test.png") if self._config['MODEL']['method'] == 'xgboost': xgboost.plot_importance(self._reg) fig = matplotlib.pyplot.gcf() fig.set_size_inches(15, 130) - fig.savefig(filename + '-feat_importance.pdf') + fig.savefig(self._filename + '-feat_importance.pdf')