from configparser import ConfigParser
+from csv import DictReader
from datetime import datetime, timedelta
from itertools import chain
from logging import getLogger
from logging.config import fileConfig
+from os import listdir
from pathlib import Path
import numpy as np
else:
self._features = set(chain.from_iterable([tuple(u.keys())
for u in [*dict_features.values()]]))
+ for csv_file in listdir():
+ with open(csv_file, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ dico_features = {{row['name']: row['type'] # qualitative (2) or quantitative (1)
+ }
+ for row in reader if row['name'] in self._features}
+
+ self._features = {feat : None for feat in self._features}
+ print(self._features)
+ exit()
@property
elif self._config['PREPROCESSING']['fill_method'] == 'spline':
self._dataframe = self._dataframe.interpolate(method='spline',
order=self._config['PREPROCESSING'].getint('order'))
- self._dataframe = self._dataframe.fillna(method='bfill')
+
+ # Uncomment this line to fill NaN values at the beginning of the
+ # dataframe. This may not be a good idea, especially for features
+ # that are available only for recent years, e.g., air quality
+ #self._dataframe = self._dataframe.fillna(method='bfill')
self._dataframe = self._dataframe.drop([k.to_pydatetime()
for k in self._dataframe.T