]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Learning process: first version
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 18 Feb 2020 12:39:49 +0000 (13:39 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 18 Feb 2020 12:39:49 +0000 (13:39 +0100)
config/learn.cfg
config/learners/xgboost.cfg [new file with mode: 0644]
main.py
predictops/engine.py
predictops/learn/learning.py [new file with mode: 0644]
predictops/learn/preprocessing.py
predictops/source/meteofrance.py
requirements.txt

index 1a9566eee07f32774bf5d8655cab5c3eb4b591c2..53c62deeb541804097e3a30cb8ee04449038aff2 100644 (file)
@@ -1,7 +1,7 @@
 [DATETIME]
 [DATETIME]
-start    = 01/01/2010 01:00:00
-end      = 12/31/2010 23:00:00
-hourStep = 6
+start    = 01/01/2006 00:00:00
+end      = 12/31/2017 23:00:00
+hourStep = 1
 
 
 [FEATURES]
 
 
 [FEATURES]
@@ -25,3 +25,7 @@ nb_lines = 5
 
 [TARGET]
 config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
 
 [TARGET]
 config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+
+
+[LEARNER]
+config =  (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg'
\ No newline at end of file
diff --git a/config/learners/xgboost.cfg b/config/learners/xgboost.cfg
new file mode 100644 (file)
index 0000000..61975c2
--- /dev/null
@@ -0,0 +1,2 @@
+[MODEL]
+method = xgboost
\ No newline at end of file
diff --git a/main.py b/main.py
index cf8fe81d13940c996d9abb1344c9a296d6f199f1..27f502aeaa676fd0a0500e2ca4f20a295f5d0ce9 100644 (file)
--- a/main.py
+++ b/main.py
@@ -18,6 +18,7 @@ if __name__ == '__main__':
 
     engine.add_preprocessing()
 
 
     engine.add_preprocessing()
 
+    engine.learn()
 
     '''target = toarea(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
 
 
     '''target = toarea(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
 
index 44ab9c4c08bc089fd53038f5cd84fe0b4d557ef1..f87e82e833fbd87bcc091f4dc568108a9bf86a21 100644 (file)
@@ -7,6 +7,7 @@ from shutil import rmtree
 
 from .source.ephemeris import Ephemeris
 from .source.meteofrance import MeteoFrance
 
 from .source.ephemeris import Ephemeris
 from .source.meteofrance import MeteoFrance
+from .learn.learning import Learning
 from .learn.preprocessing import Preprocessing
 from .target.target import Target
 
 from .learn.preprocessing import Preprocessing
 from .target.target import Target
 
@@ -73,12 +74,16 @@ class Engine:
 
 
     def add_preprocessing(self):
 
 
     def add_preprocessing(self):
-        process = Preprocessing(config_file = self._config,
-                                dict_features = self.X,
-                                dict_target = self.y)
-        print(process.dataframe.head(n=2))
+        self._preproc = Preprocessing(config_file = self._config,
+                                      dict_features = self.X,
+                                      dict_target = self.y)
 
 
 
 
+    def learn(self):
+        history = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
+        self._learner = Learning(config_file = eval(self._config['LEARNER']['config']),
+                                 X = self._preproc.dataframe, y = list(self.y.values())[history:])
+
 
     @property
     def X(self):
 
     @property
     def X(self):
diff --git a/predictops/learn/learning.py b/predictops/learn/learning.py
new file mode 100644 (file)
index 0000000..4164500
--- /dev/null
@@ -0,0 +1,45 @@
+from configparser import ConfigParser
+from math import sqrt
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+from sklearn.model_selection import train_test_split
+
+import xgboost
+
+class Learning:
+
+    def __init__(self, config_file = None,
+                 X = None, y = None):
+        self._config = ConfigParser()
+        self._config.read(config_file)
+
+        df = X
+        df['cible'] = y
+
+        print(df.head())
+
+        train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
+        train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
+
+        X_test = test_set.drop('cible', axis = 1)
+        y_test = test_set['cible'].copy()
+
+        X_train = train_set.drop('cible', axis=1)
+        y_train = train_set['cible'].copy()
+        X_val = val_set.drop('cible', axis=1)
+        y_val = val_set['cible'].copy()
+
+
+        if self._config['MODEL']['method'] == 'xgboost':
+            xgb_reg = xgboost.XGBRegressor(learning_rate = 0.01,
+                                                   max_depth = 10,
+                                                   random_state=42,
+                                                   n_estimators = 173,
+                                                   n_jobs=-1,
+                                                   objective = 'count:poisson')
+
+            xgb_reg.fit(X_train, y_train,
+                        eval_set=[(X_val, y_val)],
+                        early_stopping_rounds=10)
+
+            y_test_pred = xgb_reg.predict(X_test)
+            print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))
\ No newline at end of file
index 187a5b73b664da58031d45d55839548a10ec1be1..51ecb4e162ff77b804a6a9e4790c4e9da34e1410 100644 (file)
@@ -182,15 +182,11 @@ class Preprocessing:
         '''
         logger.info("Integrating previous nb of interventions as features")
         nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
         '''
         logger.info("Integrating previous nb of interventions as features")
         nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
-        print(len(self._dataframe))
-        print(self._dataframe.head(4))
         for k in range(1,nb_lines+1):
             name = 'history_'+str(nb_lines-k+1)
             self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
             self._numerical_columns.append(name)
         self._dataframe = self._dataframe[nb_lines:]
         for k in range(1,nb_lines+1):
             name = 'history_'+str(nb_lines-k+1)
             self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
             self._numerical_columns.append(name)
         self._dataframe = self._dataframe[nb_lines:]
-        print(self._dataframe.head(4))
-        print(len(self._dataframe))
 
 
 
 
 
 
index 6bd23edc1435b857c5a2a00150778870ad30ebd7..b26c6bf6525f0a87d1cba73d251e7937c89beac6 100644 (file)
@@ -58,6 +58,9 @@ class MeteoFrance(Source):
         self._config = ConfigParser()
         self._config.read(config_file)
 
         self._config = ConfigParser()
         self._config.read(config_file)
 
+        self._latitude = self._config['POSITION'].getfloat('latitude')
+        self._longitude = self._config['POSITION'].getfloat('longitude')
+
         self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
 
         self._dated_features = None
         self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
 
         self._dated_features = None
index 6249a161c878fe217583f52d727df621886c4b38..3e40e81e4f54c2c996a71f4146c13a9ff589b8f8 100644 (file)
@@ -17,4 +17,5 @@ scikit-learn==0.22.1
 scipy==1.4.1
 Shapely==1.7.0
 six==1.14.0
 scipy==1.4.1
 Shapely==1.7.0
 six==1.14.0
+xgboost==0.90
 xlrd==1.2.0
 xlrd==1.2.0