1 from itertools import chain
2 from logging import getLogger
3 from logging.config import fileConfig
4 from pathlib import Path
9 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
14 Generate a pandas dataframe from a dictionary of features per datetime, which
15 respects the starting and ending dates of the study, and its precision (the
16 time step) as passed to the constructor. Missing feature values are completed.
18 - Missing datetimes are added first with np.NaN feature values,
19 - The dataframe is then constructed based on the filled feature dictionary,
20 - NaN values are then filled with last known values.
23 def __init__(self, dict_features,
27 Constructor that defines all needed attributes and collects features.
29 logger.info("Entering NaN values in the feature dataframe")
30 self._dict_features = dict_features
33 self._timestep = timestep
34 self._full_dict = None
35 self._dataframe = None
37 # If features are not provided to the constructor, then we collect
38 # any existing feature in the dictionary
40 self._features = features
42 self._features = set(chain.from_iterable([tuple(u.keys())
43 for u in [*dict_features.values()]]))
48 Add datetime keys in the dated feature dictionary that are missing. The
49 features are then set to np.NaN. Add missing features in existing datetimes
52 logger.info("Adding missing dates and filling missing features with NaN values")
54 while current <= self._end:
55 self._datetimes.append(current)
56 if current not in self._dict_features:
57 self._dict_features[current] = {feature:np.NaN
58 for feature in self._features}
60 null_dict = {feature:np.NaN
61 for feature in self._features}
62 null_dict.update(self._dict_features[current])
63 self._dict_features[current] = null_dict
64 current += self._timestep
65 for k in self._dict_features:
66 null_dict = {feature:np.NaN
67 for feature in self._features}
68 null_dict.update(self._dict_features[k])
69 self._dict_features[k] = null_dict
71 self._full_dict = {k: self._dict_features[k]
72 for k in sorted(self._dict_features.keys())}
79 Returns the fully filled dated feature dictionary, ordered by datetimes
81 if self._full_dict is None:
83 return self._full_dict
90 Returns the feature dataframe, after creating it if needed.
92 if self._dataframe is None:
93 logger.info("Creating feature dataframe from feature dictionary")
94 self._dataframe = pd.DataFrame.from_dict(self.full_dict,
96 logger.info("Filling NaN values in the feature dataframe")
97 #TODO: add other filling methods like linear interpolation
98 self._dataframe = self._dataframe.fillna(method='ffill')
99 self._dataframe = self._dataframe.fillna(method='bfill')
100 self._dataframe = self._dataframe.drop([k.to_pydatetime()
101 for k in self._dataframe.T
102 if k not in self._datetimes])
103 return self._dataframe
106 def dataframe(self, df):