]> AND Private Git Repository - predictops.git/blobdiff - predictops/learn/preprocessing.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
XGBoost integrated
[predictops.git] / predictops / learn / preprocessing.py
index 187a5b73b664da58031d45d55839548a10ec1be1..106a6267c3aa804aca024c471e5c7b6e29805799 100644 (file)
@@ -171,9 +171,14 @@ class Preprocessing:
 
         # Dropping rows that are not related to our datetime window (start/
         # step / end)
-        self._dataframe = self._dataframe.drop([k.to_pydatetime()
-                                               for k in self._dataframe.T
-                                               if k not in self._datetimes])
+        logger.info("Dropping rows that are not related to our datetime window")
+        self._dataframe['datetime'] =\
+            self._dataframe.apply(lambda x: datetime(int(x.year), int(x.month), int(x.dayInMonth), int(x.hour)), axis=1)
+        self._dataframe['row_ok'] =\
+            self._dataframe.apply(lambda x:x.datetime in self._datetimes, axis=1)
+        self._dataframe = self._dataframe[self._dataframe['row_ok']]
+        self._dataframe = self._dataframe.drop(['datetime', 'row_ok'], axis=1)
+        logger.info("Rows dropped")
 
 
     def _add_history(self):
@@ -182,15 +187,11 @@ class Preprocessing:
         '''
         logger.info("Integrating previous nb of interventions as features")
         nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
-        print(len(self._dataframe))
-        print(self._dataframe.head(4))
         for k in range(1,nb_lines+1):
             name = 'history_'+str(nb_lines-k+1)
             self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
             self._numerical_columns.append(name)
         self._dataframe = self._dataframe[nb_lines:]
-        print(self._dataframe.head(4))
-        print(len(self._dataframe))