]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
lightgbm is now working
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Sun, 23 Feb 2020 09:38:01 +0000 (10:38 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Sun, 23 Feb 2020 09:38:01 +0000 (10:38 +0100)
config/learn.cfg
config/learners/lightgbm.cfg
predictops/learn/learning.py

index 73379cf605405d0a8bccf95089daf343064ad5ee..aef0edd22f22e03a84ea12dacf04f4bcf3db6322 100644 (file)
@@ -1,7 +1,7 @@
 [DATETIME]
 start    = 01/01/2006 00:00:00
 end      = 12/31/2019 23:00:00
-hourStep = 3
+hourStep = 1
 
 
 [FEATURES]
@@ -24,13 +24,13 @@ order       = 3
 
 
 [HISTORY_KNOWLEDGE]
-nb_lines = 24//3*7*4
+nb_lines = 24*7*4
 
 
 [TARGET]
 config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
 cumulative  = True
-horizon     = 1
+horizon     = 0
 
 [LEARNER]
-config =  (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg'
\ No newline at end of file
+config =  (Path.cwd() / 'config') / 'learners' / 'lightgbm.cfg'
\ No newline at end of file
index ef062b303d32bef04634f43c81ffea56132b6140..b5887f0497b8db84204398c21990d6df43c04559 100644 (file)
@@ -4,7 +4,7 @@ method = lightgbm
 [HYPERPARAMETERS]
 learning_rate  = 0.1
 metric         = auc
-num_iterations = 100
+num_iterations = 1000
 num_round      = 10
 num_leaves     = 31
 objective      = poisson
index 959271ded77ad22aea436d98632ab7e97cf272d4..fb3675d56d7e007c557d93aeedd64bcc514a8df0 100644 (file)
@@ -10,6 +10,7 @@ from statistics import mean, stdev
 import lightgbm as lgb
 import matplotlib
 import os
+import pandas as pd
 import pylab as P
 import xgboost
 
@@ -25,8 +26,16 @@ class Learning:
         self._config.read(config_file)
         self._file_name = file_name
         logger.info("Dealing with the horizon of prediction")
-        self._X = X[:-horizon]
-        self._y = y[horizon:]
+        if horizon:
+            self._X = X[:-horizon]
+            self._y = y[horizon:]
+        else:
+            self._X = X
+            self._y = y
+        rep = (Path.cwd() / self._file_name)
+        rep.mkdir()
+        self._filename = str(self._file_name / os.path.basename(self._file_name))
+        self._X.to_csv(self._filename + '.csv')
         self._learn()
         self._evaluate()
 
@@ -84,10 +93,7 @@ class Learning:
             txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
 
         print(txt)
-        rep = (Path.cwd() / self._file_name)
-        rep.mkdir()
-        filename = str(self._file_name / os.path.basename(self._file_name))
-        with open(filename + ".result", 'w') as f:
+        with open(self._filename + ".result", 'w') as f:
             f.write(txt)
 
         y_true = self._df[self._df.year == self._df.year.max()].cible
@@ -101,7 +107,7 @@ class Learning:
         P.xlabel('Hour in the year')
         P.ylabel('Number of cumulated interventions')
         P.legend()
-        P.savefig(filename + ".png")
+        P.savefig(self._filename + ".png")
 
         yy_test_pred = self._reg.predict(self._X_test)
         P.figure(figsize=(36, 16))
@@ -111,10 +117,10 @@ class Learning:
         P.xlabel('Hour in the year')
         P.ylabel('Number of cumulated interventions')
         P.legend()
-        P.savefig(filename + "-test.png")
+        P.savefig(self._filename + "-test.png")
 
         if self._config['MODEL']['method'] == 'xgboost':
             xgboost.plot_importance(self._reg)
             fig = matplotlib.pyplot.gcf()
             fig.set_size_inches(15, 130)
-            fig.savefig(filename + '-feat_importance.pdf')
+            fig.savefig(self._filename + '-feat_importance.pdf')