from configparser import ConfigParser
+from csv import DictReader
from datetime import datetime, timedelta
from itertools import chain
from logging import getLogger
from logging.config import fileConfig
+from os import listdir
from pathlib import Path
import numpy as np
else:
self._features = set(chain.from_iterable([tuple(u.keys())
for u in [*dict_features.values()]]))
+ csv_files = Path.cwd() / 'config' / 'features'
+ self._features = {feat : None for feat in self._features}
+ for csv_file in listdir(csv_files):
+ with open(csv_files / csv_file, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ for row in reader:
+ if row['name'] in self._features:
+ self._features[row['name']] = row['type']
+ print(self._features)
+ exit()
@property
elif self._config['PREPROCESSING']['fill_method'] == 'spline':
self._dataframe = self._dataframe.interpolate(method='spline',
order=self._config['PREPROCESSING'].getint('order'))
- self._dataframe = self._dataframe.fillna(method='bfill')
+
+ # Uncomment this line to fill NaN values at the beginning of the
+ # dataframe. This may not be a good idea, especially for features
+ # that are available only for recent years, e.g., air quality
+ #self._dataframe = self._dataframe.fillna(method='bfill')
self._dataframe = self._dataframe.drop([k.to_pydatetime()
for k in self._dataframe.T