**.py[cod]
**$py.class
-data/
archives/
bonnes_pratiques.txt
from predictops.source.ephemeris import Ephemeris
from predictops.source.meteofrance import MeteoFrance
+from predictops.learn.preprocessing import Preprocessing
from predictops.target.toarea import ToArea
-from datetime import datetime
+from datetime import datetime, timedelta
from logging import getLogger
from logging.config import fileConfig
from pathlib import Path
class Engine:
def __init__(self, start = None, end = None, time_step = None):
- logger.info("Predictops engine launched")
self._X = {}
self._Y = {}
features = ['hour', 'dayInWeek', 'dayInMonth', 'dayInYear',
'weekInYear', 'month', 'year'])
-print(engine.X[datetime.strptime('06/30/2010 21:00:00', '%m/%d/%Y %H:%M:%S')])
+process = Preprocessing(dict_features = engine.X,
+ start = start, end = end, timestep = timedelta(hours=1))
+
+process.fill_na()
+print(process.dataframe.head(n=20))
+#print(engine.X[datetime.strptime('06/30/2010 21:00:00', '%m/%d/%Y %H:%M:%S')])
+
+exit()
depts = gpd.read_file( Path.cwd() / 'data' / 'targets' / 'departments' / "departements-20180101.shp")
Doubs = depts.loc[depts['nom'] == 'Doubs'].iloc[0]
ToArea(area=Doubs.geometry,
- start = start, end = end)
+ start = start, end = end,
+ csv_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
--- /dev/null
+from itertools import chain
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+class Preprocessing:
+ def __init__(self, dict_features,
+ start, end, timestep,
+ features = None):
+ self._dict_features = dict_features
+ self._start = start
+ self._end = end
+ self._timestep = timestep
+ self._dataframe = None
+
+ if features != None:
+ self._features = features
+ else:
+ self._features = set(chain.from_iterable([tuple(u.keys())
+ for u in [*dict_features.values()]]))
+
+
+ def _fill_dict(self):
+ current = self._start
+ while current <= self._end:
+ if current not in self._dict_features:
+ self._dict_features[current] = {feature:np.NaN for feature in self._features}
+ else:
+ null_dict = {feature:np.NaN for feature in self._features}
+ null_dict.update(self._dict_features[current])
+ self._dict_features[current] = null_dict
+ current += self._timestep
+
+
+ @property
+ def full_dict(self):
+ self._fill_dict()
+ return {k: self._dict_features[k] for k in sorted(self._dict_features.keys())}
+
+
+ @property
+ def dataframe(self):
+ if self._dataframe is None:
+ self._dataframe = pd.DataFrame.from_dict(self.full_dict, orient='index')
+ return self._dataframe
+
+ @dataframe.setter
+ def dataframe(self, df):
+ self._dataframe = df
+
+
+ def fill_na(self):
+ self.dataframe = self.dataframe.fillna(method='ffill')
\ No newline at end of file
from csv import DictReader
from datetime import datetime
-from os import listdir
-from pathlib import Path
class ToArea:
def __init__(self, area = None,
start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
- end = datetime.now()):
- self._get_located_interventions()
+ end = datetime.now(),
+ csv_file = None):
+ self._area = area
+ self._csv_file = csv_file
+ self._get_located_interventions()
def _get_located_interventions(self):
- self._data_directory = Path.cwd() / 'data' / 'targets' / 'sdis25'
- self._dict_interv = {}
- for year in range(2006,2018):
- if year < 2012:
- file_place = self._data_directory / 'interventions' / (str(year)+'.csv')
- else:
- file_place = self._data_directory / 'victims' / ('Liste_des_victimes_'+str(year)+'.csv')
- with open(file_place, "r") as f:
- reader = DictReader(f, delimiter='£')
- for row in reader:
- self._dict_interv.update({
- row['N° intervention']: {
- 'X' : row['Coord X'],
- 'Y' : row['Coord Y']
- } for row in reader
- })
- for csv_file in listdir(self._data_directory / 'interventions'):
- with open(self._data_directory / 'interventions' / csv_file, "r") as f:
- reader = DictReader(f, delimiter='£')
- for row in reader:
- if row['N° intervention'] in self._dict_interv:
- self._dict_interv[row['N° intervention']].update(
- {
- 'start': row['Début'],
- 'end' : row['Fin']
- })
- else:
- print(row['N° intervention'])
+ with open(self._csv_file) as f:
+ reader = DictReader(f, delimiter=',')
+ for row in reader:
+ print(row)
+