]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
XGBoost integrated
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 18 Feb 2020 16:05:04 +0000 (17:05 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 18 Feb 2020 16:05:04 +0000 (17:05 +0100)
config/learn.cfg
config/learners/xgboost.cfg
predictops/learn/preprocessing.py

index 53c62deeb541804097e3a30cb8ee04449038aff2..29bd628119eb2aad019dab9847538df1a8e30fef 100644 (file)
@@ -1,7 +1,7 @@
 [DATETIME]
-start    = 01/01/2006 00:00:00
+start    = 01/01/2010 01:00:00
 end      = 12/31/2017 23:00:00
-hourStep = 1
+hourStep = 5
 
 
 [FEATURES]
index 61975c2be135cab5906e05705ab37f94d31cd21c..0dd78d0194d1f6585b2c342557e6a8ed035f54ee 100644 (file)
@@ -1,2 +1,10 @@
 [MODEL]
-method = xgboost
\ No newline at end of file
+method = xgboost
+
+[HYPERPARAMETERS]
+learning_rate = 0.01,
+max_depth = 10,
+random_state=42,
+n_estimators = 173,
+n_jobs=-1,
+objective = 'count:poisson'
\ No newline at end of file
index 51ecb4e162ff77b804a6a9e4790c4e9da34e1410..106a6267c3aa804aca024c471e5c7b6e29805799 100644 (file)
@@ -171,9 +171,14 @@ class Preprocessing:
 
         # Dropping rows that are not related to our datetime window (start/
         # step / end)
-        self._dataframe = self._dataframe.drop([k.to_pydatetime()
-                                               for k in self._dataframe.T
-                                               if k not in self._datetimes])
+        logger.info("Dropping rows that are not related to our datetime window")
+        self._dataframe['datetime'] =\
+            self._dataframe.apply(lambda x: datetime(int(x.year), int(x.month), int(x.dayInMonth), int(x.hour)), axis=1)
+        self._dataframe['row_ok'] =\
+            self._dataframe.apply(lambda x:x.datetime in self._datetimes, axis=1)
+        self._dataframe = self._dataframe[self._dataframe['row_ok']]
+        self._dataframe = self._dataframe.drop(['datetime', 'row_ok'], axis=1)
+        logger.info("Rows dropped")
 
 
     def _add_history(self):