X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/predictops.git/blobdiff_plain/83fdad7cdd97734f036d464acceebaf3b0f0a44b..HEAD:/predictops/learn/preprocessing.py?ds=sidebyside diff --git a/predictops/learn/preprocessing.py b/predictops/learn/preprocessing.py index 9bc09ad..55cffbd 100644 --- a/predictops/learn/preprocessing.py +++ b/predictops/learn/preprocessing.py @@ -1,10 +1,7 @@ from configparser import ConfigParser -from csv import DictReader -from datetime import datetime, timedelta from itertools import chain from logging import getLogger from logging.config import fileConfig -from os import listdir from pathlib import Path from sklearn import preprocessing @@ -27,17 +24,16 @@ class Preprocessing: ''' def __init__(self, config_file=None, + start=None, end=None, timestep=None, dict_features=None, dict_target=None): ''' Constructor that defines all needed attributes and collects features. ''' self._config = config_file - self._start = datetime.strptime(self._config['DATETIME']['start'], - '%m/%d/%Y %H:%M:%S') - self._end = datetime.strptime(self._config['DATETIME']['end'], - '%m/%d/%Y %H:%M:%S') - self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep')) + self._start = start + self._end = end + self._timestep = timestep self._dict_features = dict_features self._dict_target = dict_target @@ -196,6 +192,7 @@ class Preprocessing: ''' logger.info("One hot encoding for categorical feature") # We store numerical columns + df_out = pd.DataFrame() for col in self._numerical_columns: df_out[col] = self._dataframe[col] @@ -222,6 +219,8 @@ class Preprocessing: self._fill_nan() # Adding previous (historical) nb_interventions as features self._add_history() + # self._dataframe.to_csv('toto.csv') + # exit() # Normalizing numerical values self._standardize() # Dealing with categorical features