from predictops.engine import Engine
from predictops.learn.preprocessing import Preprocessing
+from predictops.target.all import All
from predictops.target.toarea import ToArea
from logging import getLogger
#print(engine.X[datetime.strptime('06/30/2010 21:00:00', '%m/%d/%Y %H:%M:%S')])
print(process.dataframe.head(n=20))
print(process.dataframe.tail(n=20))
+
+
+ target = All(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
+
+
exit()
depts = gpd.read_file( Path.cwd() / 'data' / 'targets' / 'departments' / "departements-20180101.shp")
from configparser import ConfigParser
+from csv import DictReader
from datetime import datetime, timedelta
from itertools import chain
from logging import getLogger
from logging.config import fileConfig
+from os import listdir
from pathlib import Path
import numpy as np
else:
self._features = set(chain.from_iterable([tuple(u.keys())
for u in [*dict_features.values()]]))
+ for csv_file in listdir():
+ with open(csv_file, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ dico_features = {{row['name']: row['type'] # qualitative (2) or quantitative (1)
+ }
+ for row in reader if row['name'] in self._features}
+
+ self._features = {feat : None for feat in self._features}
+ print(self._features)
+ exit()
@property
elif self._config['PREPROCESSING']['fill_method'] == 'spline':
self._dataframe = self._dataframe.interpolate(method='spline',
order=self._config['PREPROCESSING'].getint('order'))
- self._dataframe = self._dataframe.fillna(method='bfill')
+
+ # Uncomment this line to fill NaN values at the beginning of the
+ # dataframe. This may not be a good idea, especially for features
+ # that are available only for recent years, e.g., air quality
+ #self._dataframe = self._dataframe.fillna(method='bfill')
self._dataframe = self._dataframe.drop([k.to_pydatetime()
for k in self._dataframe.T
def __init__(self, config_file):
+ # Check for the integrity of feature names
+ super(Source, self).__init__()
+
self._config = ConfigParser()
self._config.read(config_file)
+from .source import Source
+
from configparser import ConfigParser
from csv import DictReader
from datetime import datetime
CSV_FILE = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
-class MeteoFrance:
+class MeteoFrance(Source):
_latitude = None
_longitude = None
to their names in meteofrance_features.csv (cf. config directory)
'''
+ # Check for the integrity of feature names
+ super(Source, self).__init__()
+
self._config = ConfigParser()
self._config.read(config_file)
--- /dev/null
+from csv import DictReader
+from logging import getLogger
+from logging.config import fileConfig
+from os import listdir
+from pathlib import Path
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Source:
+ def __init__(self):
+ '''
+ Check if the same feature name is used in two different feature sources
+ '''
+ logger.info('Check for redondant feature names')
+ csv_files = Path.cwd() / 'config' / 'features'
+ list_of_names = []
+ for csv_file in listdir(csv_files):
+ with open(csv_file, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ list_of_names.extend([row['name'] for row in reader])
+ if len(list_of_names) != len(set(list_of_names)):
+ raise ValueError("At least two features have the same name")
\ No newline at end of file
--- /dev/null
+from csv import DictReader
+
+class All:
+
+ _start = None
+ _end = None
+
+ def __init__(self, stream_file = None):
+ self._stream_file = stream_file
+ self._get_located_interventions()
+
+
+ def _get_located_interventions(self):
+ with open(self._stream_file) as f:
+ reader = DictReader(f, delimiter=',')
+ for row in reader:
+ print(row)
+