]> AND Private Git Repository - predictops.git/blob - predictops/learn/preprocessing.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
b58ffac00588fc22d7f7f3d37edcf63b791d16f4
[predictops.git] / predictops / learn / preprocessing.py
1 from itertools import chain
2 from logging import getLogger
3 from logging.config import fileConfig
4 from pathlib import Path
5
6 import numpy as np
7 import pandas as pd
8
9 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
10 logger = getLogger()
11
12 class Preprocessing:
13     def __init__(self, dict_features,
14                  start, end, timestep,
15                  features = None):
16         self._dict_features = dict_features
17         self._start = start
18         self._end = end
19         self._timestep = timestep
20         self._dataframe = None
21
22         if features != None:
23             self._features = features
24         else:
25             self._features = set(chain.from_iterable([tuple(u.keys())
26                                                       for u in [*dict_features.values()]]))
27
28
29     def _fill_dict(self):
30         current = self._start
31         while current <= self._end:
32             if current not in self._dict_features:
33                 self._dict_features[current] = {feature:np.NaN for feature in self._features}
34             else:
35                 null_dict = {feature:np.NaN for feature in self._features}
36                 null_dict.update(self._dict_features[current])
37                 self._dict_features[current] = null_dict
38             current += self._timestep
39
40
41     @property
42     def full_dict(self):
43         self._fill_dict()
44         return {k: self._dict_features[k] for k in sorted(self._dict_features.keys())}
45
46
47     @property
48     def dataframe(self):
49         if self._dataframe is None:
50             self._dataframe = pd.DataFrame.from_dict(self.full_dict, orient='index')
51         return self._dataframe
52
53     @dataframe.setter
54     def dataframe(self, df):
55         self._dataframe = df
56
57
58     def fill_na(self):
59         self.dataframe = self.dataframe.fillna(method='ffill')