Add Belfort and Montbeliard dataset

[predictops.git] / predictops / learn / preprocessing.py
diff --git a/predictops/learn/preprocessing.py b/predictops/learn/preprocessing.py

index 187a5b73b664da58031d45d55839548a10ec1be1..106a6267c3aa804aca024c471e5c7b6e29805799 100644 (file)
--- a/predictops/learn/preprocessing.py
+++ b/predictops/learn/preprocessing.py
@@ -171,9 +171,14 @@ class Preprocessing:
  
          # Dropping rows that are not related to our datetime window (start/
          # step / end)
-        self._dataframe = self._dataframe.drop([k.to_pydatetime()
-                                               for k in self._dataframe.T
-                                               if k not in self._datetimes])
+        logger.info("Dropping rows that are not related to our datetime window")
+        self._dataframe['datetime'] =\
+            self._dataframe.apply(lambda x: datetime(int(x.year), int(x.month), int(x.dayInMonth), int(x.hour)), axis=1)
+        self._dataframe['row_ok'] =\
+            self._dataframe.apply(lambda x:x.datetime in self._datetimes, axis=1)
+        self._dataframe = self._dataframe[self._dataframe['row_ok']]
+        self._dataframe = self._dataframe.drop(['datetime', 'row_ok'], axis=1)
+        logger.info("Rows dropped")
  
  
      def _add_history(self):
@@ -182,15 +187,11 @@ class Preprocessing:
          '''
          logger.info("Integrating previous nb of interventions as features")
          nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
-        print(len(self._dataframe))
-        print(self._dataframe.head(4))
          for k in range(1,nb_lines+1):
              name = 'history_'+str(nb_lines-k+1)
              self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
              self._numerical_columns.append(name)
          self._dataframe = self._dataframe[nb_lines:]
-        print(self._dataframe.head(4))
-        print(len(self._dataframe))