From 288baa6ff06c1b815ec24d164770acc93ac80499 Mon Sep 17 00:00:00 2001
From: Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Date: Tue, 18 Feb 2020 13:39:49 +0100
Subject: [PATCH] Learning process: first version

---
 config/learn.cfg                  | 10 ++++---
 config/learners/xgboost.cfg       |  2 ++
 main.py                           |  1 +
 predictops/engine.py              | 13 ++++++---
 predictops/learn/learning.py      | 45 +++++++++++++++++++++++++++++++
 predictops/learn/preprocessing.py |  4 ---
 predictops/source/meteofrance.py  |  3 +++
 requirements.txt                  |  1 +
 8 files changed, 68 insertions(+), 11 deletions(-)
 create mode 100644 config/learners/xgboost.cfg
 create mode 100644 predictops/learn/learning.py

diff --git a/config/learn.cfg b/config/learn.cfg
index 1a9566e..53c62de 100644
--- a/config/learn.cfg
+++ b/config/learn.cfg
@@ -1,7 +1,7 @@
 [DATETIME]
-start    = 01/01/2010 01:00:00
-end      = 12/31/2010 23:00:00
-hourStep = 6
+start    = 01/01/2006 00:00:00
+end      = 12/31/2017 23:00:00
+hourStep = 1
 
 
 [FEATURES]
@@ -25,3 +25,7 @@ nb_lines = 5
 
 [TARGET]
 config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+
+
+[LEARNER]
+config =  (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg'
\ No newline at end of file
diff --git a/config/learners/xgboost.cfg b/config/learners/xgboost.cfg
new file mode 100644
index 0000000..61975c2
--- /dev/null
+++ b/config/learners/xgboost.cfg
@@ -0,0 +1,2 @@
+[MODEL]
+method = xgboost
\ No newline at end of file
diff --git a/main.py b/main.py
index cf8fe81..27f502a 100644
--- a/main.py
+++ b/main.py
@@ -18,6 +18,7 @@ if __name__ == '__main__':
 
     engine.add_preprocessing()
 
+    engine.learn()
 
     '''target = toarea(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
 
diff --git a/predictops/engine.py b/predictops/engine.py
index 44ab9c4..f87e82e 100644
--- a/predictops/engine.py
+++ b/predictops/engine.py
@@ -7,6 +7,7 @@ from shutil import rmtree
 
 from .source.ephemeris import Ephemeris
 from .source.meteofrance import MeteoFrance
+from .learn.learning import Learning
 from .learn.preprocessing import Preprocessing
 from .target.target import Target
 
@@ -73,12 +74,16 @@ class Engine:
 
 
     def add_preprocessing(self):
-        process = Preprocessing(config_file = self._config,
-                                dict_features = self.X,
-                                dict_target = self.y)
-        print(process.dataframe.head(n=2))
+        self._preproc = Preprocessing(config_file = self._config,
+                                      dict_features = self.X,
+                                      dict_target = self.y)
 
 
+    def learn(self):
+        history = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
+        self._learner = Learning(config_file = eval(self._config['LEARNER']['config']),
+                                 X = self._preproc.dataframe, y = list(self.y.values())[history:])
+
 
     @property
     def X(self):
diff --git a/predictops/learn/learning.py b/predictops/learn/learning.py
new file mode 100644
index 0000000..4164500
--- /dev/null
+++ b/predictops/learn/learning.py
@@ -0,0 +1,45 @@
+from configparser import ConfigParser
+from math import sqrt
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+from sklearn.model_selection import train_test_split
+
+import xgboost
+
+class Learning:
+
+    def __init__(self, config_file = None,
+                 X = None, y = None):
+        self._config = ConfigParser()
+        self._config.read(config_file)
+
+        df = X
+        df['cible'] = y
+
+        print(df.head())
+
+        train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
+        train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
+
+        X_test = test_set.drop('cible', axis = 1)
+        y_test = test_set['cible'].copy()
+
+        X_train = train_set.drop('cible', axis=1)
+        y_train = train_set['cible'].copy()
+        X_val = val_set.drop('cible', axis=1)
+        y_val = val_set['cible'].copy()
+
+
+        if self._config['MODEL']['method'] == 'xgboost':
+            xgb_reg = xgboost.XGBRegressor(learning_rate = 0.01,
+                                                   max_depth = 10,
+                                                   random_state=42,
+                                                   n_estimators = 173,
+                                                   n_jobs=-1,
+                                                   objective = 'count:poisson')
+
+            xgb_reg.fit(X_train, y_train,
+                        eval_set=[(X_val, y_val)],
+                        early_stopping_rounds=10)
+
+            y_test_pred = xgb_reg.predict(X_test)
+            print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))
\ No newline at end of file
diff --git a/predictops/learn/preprocessing.py b/predictops/learn/preprocessing.py
index 187a5b7..51ecb4e 100644
--- a/predictops/learn/preprocessing.py
+++ b/predictops/learn/preprocessing.py
@@ -182,15 +182,11 @@ class Preprocessing:
         '''
         logger.info("Integrating previous nb of interventions as features")
         nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
-        print(len(self._dataframe))
-        print(self._dataframe.head(4))
         for k in range(1,nb_lines+1):
             name = 'history_'+str(nb_lines-k+1)
             self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
             self._numerical_columns.append(name)
         self._dataframe = self._dataframe[nb_lines:]
-        print(self._dataframe.head(4))
-        print(len(self._dataframe))
 
 
 
diff --git a/predictops/source/meteofrance.py b/predictops/source/meteofrance.py
index 6bd23ed..b26c6bf 100644
--- a/predictops/source/meteofrance.py
+++ b/predictops/source/meteofrance.py
@@ -58,6 +58,9 @@ class MeteoFrance(Source):
         self._config = ConfigParser()
         self._config.read(config_file)
 
+        self._latitude = self._config['POSITION'].getfloat('latitude')
+        self._longitude = self._config['POSITION'].getfloat('longitude')
+
         self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
 
         self._dated_features = None
diff --git a/requirements.txt b/requirements.txt
index 6249a16..3e40e81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,4 +17,5 @@ scikit-learn==0.22.1
 scipy==1.4.1
 Shapely==1.7.0
 six==1.14.0
+xgboost==0.90
 xlrd==1.2.0
-- 
2.39.5