]> AND Private Git Repository - predictops.git/blobdiff - predictops/learn/preprocessing.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Refactoring, fin du lever/coucher de soleil, et début de sentinelles
[predictops.git] / predictops / learn / preprocessing.py
index 9bc09ad2eca2759c22b6047c3ded8ab747e015de..55cffbd2a0e094a610580e1e4dbcdf8adc80de5d 100644 (file)
@@ -1,10 +1,7 @@
 from configparser import ConfigParser
-from csv import DictReader
-from datetime import datetime, timedelta
 from itertools import chain
 from logging import getLogger
 from logging.config import fileConfig
-from os import listdir
 from pathlib import Path
 from sklearn import preprocessing
 
@@ -27,17 +24,16 @@ class Preprocessing:
     '''
 
     def __init__(self, config_file=None,
+                 start=None, end=None, timestep=None,
                  dict_features=None, dict_target=None):
         '''
         Constructor that defines all needed attributes and collects features.
         '''
         self._config = config_file
 
-        self._start = datetime.strptime(self._config['DATETIME']['start'],
-                                        '%m/%d/%Y %H:%M:%S')
-        self._end = datetime.strptime(self._config['DATETIME']['end'],
-                                      '%m/%d/%Y %H:%M:%S')
-        self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
+        self._start = start
+        self._end = end
+        self._timestep = timestep
         self._dict_features = dict_features
         self._dict_target = dict_target
 
@@ -196,6 +192,7 @@ class Preprocessing:
         '''
         logger.info("One hot encoding for categorical feature")
         # We store numerical columns
+
         df_out = pd.DataFrame()
         for col in self._numerical_columns:
             df_out[col] = self._dataframe[col]
@@ -222,6 +219,8 @@ class Preprocessing:
             self._fill_nan()
             # Adding previous (historical) nb_interventions as features
             self._add_history()
+            # self._dataframe.to_csv('toto.csv')
+            # exit()
             # Normalizing numerical values
             self._standardize()
             # Dealing with categorical features