--- /dev/null
+[FEATURES]
+hour = True
+dayInWeek = True
+dayInMonth = True
+dayInYear = True
+weekInYear = True
+month = True
+year = True
+
+[HOUR]
+numerical = True
+
+[YEAR]
+numerical = True
\ No newline at end of file
--- /dev/null
+[GENERAL]
+regenerate = False
+reinsert = True
+
+[POSITION]
+latitude = 47.25
+longitude = 6.0333
+
+[STATIONS]
+nb_stations = 3
+
+[FEATURES]
+temperature = True
+pressure = True
+pressureVariation = False
+barometricTrend = False
+humidity = False
+dewPoint = False
+lastHourRainfall = False
+last3hHourRainfall = False
+meanWindSpeed10min = False
+meanWindDirection10min = False
+gustsOverAPeriod = False
+horizontalVisibility = False
+currentWeather = False
\ No newline at end of file
+++ /dev/null
-[meteofrance]
-regenerate = False
-reinsert = True
t,temperature,K,real,1
pres,pressure,Pa,integer,1
tend,pressureVariation,Pa,integer,1
-cod_tend,BarometricTrend,code,integer,2
+cod_tend,barometricTrend,code,integer,2
u,humidity,%,integer,1
td,dewPoint,K,real,1
rr1,lastHourRainfall,mm,real,1
--- /dev/null
+[DATETIME]
+start = 01/01/2010 01:00:00
+end = 12/31/2010 23:00:00
+hourStep = 6
+
+
+[FEATURES]
+meteofrance = True
+ephemeris = True
+
+
+[FEATURE_CONFIG]
+meteofrance = (Path.cwd() / 'config') / 'feature_meteo.cfg'
+ephemeris = (Path.cwd() / 'config') / 'feature_ephemeris.cfg'
+
+
+[PREPROCESSING]
+fill_method = spline
+order = 3
\ No newline at end of file
+++ /dev/null
-[postgresql]
-host = localhost
-user = christophe
-port = 5432
-dbname = extome
-from predictops.source.ephemeris import Ephemeris
-from predictops.source.meteofrance import MeteoFrance
+from predictops.engine import Engine
from predictops.learn.preprocessing import Preprocessing
from predictops.target.toarea import ToArea
-from datetime import datetime, timedelta
from logging import getLogger
from logging.config import fileConfig
from pathlib import Path
-from shutil import rmtree
import geopandas as gpd
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
+if __name__ == '__main__':
-class Engine:
- def __init__(self, start = None, end = None, time_step = None):
- self._X = {}
- self._Y = {}
+ config = (Path.cwd() / 'config') / 'learn.cfg'
+ engine = Engine(config_file = config)
+ engine.add_features()
+ #print(engine.X)
+ process = Preprocessing(config_file = config, dict_features = engine.X)
- def clean(self):
- # Cleaning the data directory
- logger.info("Cleaning and restoring data directory")
- directory = Path.cwd() / 'data'
- if directory.is_dir():
- rmtree(directory)
- p = Path(Path.cwd() / 'data')
- p.mkdir()
+ #print(engine.X[datetime.strptime('06/30/2010 21:00:00', '%m/%d/%Y %H:%M:%S')])
+ print(process.dataframe.head(n=20))
+ print(process.dataframe.tail(n=20))
+ exit()
- def add_feature(self, name, **kw):
+ depts = gpd.read_file( Path.cwd() / 'data' / 'targets' / 'departments' / "departements-20180101.shp")
+ Doubs = depts.loc[depts['nom'] == 'Doubs'].iloc[0]
- if name == 'meteofrance':
- meteofeature = MeteoFrance(**kw)
- meteofeature.update()
- dated_features = meteofeature.dated_features
- for date in dated_features:
- self._X.setdefault(date,{}).update(dated_features[date])
- elif name == 'ephemeris':
- ephemerides = Ephemeris(**kw)
- dated_features = ephemerides.dated_features
- for date in dated_features:
- self._X.setdefault(date,{}).update(dated_features[date])
-
-
- @property
- def X(self):
- return self._X
-
- @X.setter
- def X(self, x):
- self._X = x
-
-
-start = datetime.strptime('01/01/2010 01:00:00', '%m/%d/%Y %H:%M:%S')
-end = datetime.strptime('12/31/2010 23:00:00', '%m/%d/%Y %H:%M:%S')
-
-engine = Engine()
-engine.add_feature(name = 'meteofrance',
- start = start, end = end,
- latitude = 47.25, longitude = 6.0333, nb_stations = 3,
- features = ['temperature', 'pressure'])
-
-
-engine.add_feature(name = 'ephemeris',
- start = start, end = end,
- features = ['hour', 'dayInWeek', 'dayInMonth', 'dayInYear',
- 'weekInYear', 'month', 'year'])
-
-
-process = Preprocessing(dict_features = engine.X,
- start = start, end = end, timestep = timedelta(hours=6))
-
-
-df = process.dataframe.head(n=20)
-#print(engine.X[datetime.strptime('06/30/2010 21:00:00', '%m/%d/%Y %H:%M:%S')])
-print(df)
-exit()
-
-depts = gpd.read_file( Path.cwd() / 'data' / 'targets' / 'departments' / "departements-20180101.shp")
-Doubs = depts.loc[depts['nom'] == 'Doubs'].iloc[0]
-
-ToArea(area=Doubs.geometry,
- start = start, end = end,
- csv_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
+ ToArea(area=Doubs.geometry,
+ start = start, end = end,
+ csv_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
--- /dev/null
+from configparser import ConfigParser
+from datetime import datetime, timedelta
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+from shutil import rmtree
+
+from predictops.source.ephemeris import Ephemeris
+from predictops.source.meteofrance import MeteoFrance
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Engine:
+
+ def __init__(self, config_file = (Path.cwd() / 'config') / 'learn.cfg'):
+ self._config = ConfigParser()
+ self._config.read(config_file)
+ self._start = datetime.strptime(self._config['DATETIME']['start'],
+ '%m/%d/%Y %H:%M:%S')
+ self._end = datetime.strptime(self._config['DATETIME']['end'],
+ '%m/%d/%Y %H:%M:%S')
+
+ self._timestep = timedelta(hours =
+ self._config['DATETIME'].getfloat('hourStep'))
+
+ self._X = {}
+ self._Y = {}
+
+
+
+ def clean(self):
+ # Cleaning the data directory
+ logger.info("Cleaning and restoring data directory")
+ directory = Path.cwd() / 'data'
+ if directory.is_dir():
+ rmtree(directory)
+ p = Path(Path.cwd() / 'data')
+ p.mkdir()
+
+
+ def add_features(self):
+ if self._config['FEATURES'].getboolean('meteofrance'):
+ meteofeature = MeteoFrance(config_file =
+ eval(self._config['FEATURE_CONFIG']['meteofrance']))
+
+ meteofeature.start = self._start
+ meteofeature.end = self._end
+
+ meteofeature.update()
+ dated_features = meteofeature.dated_features
+ for date in dated_features:
+ self._X.setdefault(date,{}).update(dated_features[date])
+
+ if self._config['FEATURES'].getboolean('ephemeris'):
+ ephemerides = Ephemeris(config_file =
+ eval(self._config['FEATURE_CONFIG']['ephemeris']))
+
+ ephemerides.start = self._start
+ ephemerides.end = self._end
+
+ dated_features = ephemerides.dated_features
+ for date in dated_features:
+ self._X.setdefault(date,{}).update(dated_features[date])
+
+
+ @property
+ def X(self):
+ return self._X
+
+ @X.setter
+ def X(self, x):
+ self._X = x
\ No newline at end of file
+from configparser import ConfigParser
+from datetime import datetime, timedelta
from itertools import chain
from logging import getLogger
from logging.config import fileConfig
- Missing datetimes are added first with np.NaN feature values,
- The dataframe is then constructed based on the filled feature dictionary,
- NaN values are then filled with last known values.
-
'''
- def __init__(self, dict_features,
- start, end, timestep,
- features = None):
+
+ def __init__(self, config_file = None, dict_features = None, features = None):
'''
Constructor that defines all needed attributes and collects features.
'''
- logger.info("Entering NaN values in the feature dataframe")
+ self._config = ConfigParser()
+ self._config.read(config_file)
+
+ self._start = datetime.strptime(self._config['DATETIME']['start'],
+ '%m/%d/%Y %H:%M:%S')
+ self._end = datetime.strptime(self._config['DATETIME']['end'],
+ '%m/%d/%Y %H:%M:%S')
+ self._timestep = timedelta(hours =
+ self._config['DATETIME'].getfloat('hourStep'))
self._dict_features = dict_features
- self._start = start
- self._end = end
- self._timestep = timestep
self._full_dict = None
self._dataframe = None
self._datetimes = []
for u in [*dict_features.values()]]))
+ @property
+ def start(self):
+ return self._start
+
+ @start.setter
+ def start(self, x):
+ self._start = x
+
+
+ @property
+ def end(self):
+ return self._end
+
+ @end.setter
+ def end(self, x):
+ self._end = x
+
+
+ @property
+ def timestep(self):
+ return self._timestep
+
+ @timestep.setter
+ def timestep(self, x):
+ self._timestep = x
+
+
def _fill_dict(self):
'''
Add datetime keys in the dated feature dictionary that are missing. The
self._dataframe = pd.DataFrame.from_dict(self.full_dict,
orient='index')
logger.info("Filling NaN values in the feature dataframe")
- #TODO: add other filling methods like linear interpolation
- self._dataframe = self._dataframe.fillna(method='ffill')
+
+ if self._config['PREPROCESSING']['fill_method'] == 'propagate':
+ self._dataframe = self._dataframe.fillna(method='ffill')
+ elif self._config['PREPROCESSING']['fill_method'] == 'linear':
+ self._dataframe = self._dataframe.interpolate()
+ elif self._config['PREPROCESSING']['fill_method'] == 'spline':
+ self._dataframe = self._dataframe.interpolate(method='spline',
+ order=self._config['PREPROCESSING'].getint('order'))
self._dataframe = self._dataframe.fillna(method='bfill')
+
self._dataframe = self._dataframe.drop([k.to_pydatetime()
for k in self._dataframe.T
if k not in self._datetimes])
return self._dataframe
+
@dataframe.setter
def dataframe(self, df):
self._dataframe = df
+from configparser import ConfigParser
+from csv import DictReader
from datetime import datetime, timedelta
+from pathlib import Path
+
import time
import calendar
+CSV_FILE = Path.cwd() / 'config' / 'features' / 'ephemeris_features.csv'
+
class Ephemeris:
- def __init__(self, start = time.strptime('19960101000000', '%Y%m%d%H%M%S'),
- end = datetime.now(), features = []):
- self._start = start
- self._end = end
- self._features = features
+ _start = None
+ _end = None
+
+ def __init__(self, config_file):
+
+ self._config = ConfigParser()
+ self._config.read(config_file)
+
+ # Collecting ephemeris features
+ with open(CSV_FILE, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ self._features = [row['name'] for row in reader
+ if self._config['FEATURES'].getboolean(row['name'])]
self._dated_features = {}
+ @property
+ def start(self):
+ return self._start
+
+ @start.setter
+ def start(self, x):
+ self._start = x
+
+
+ @property
+ def end(self):
+ return self._end
- def update(self):
- pass
+ @end.setter
+ def end(self, x):
+ self._end = x
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
+CSV_FILE = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
+
+
class MeteoFrance:
- def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
- start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
- end = datetime.now(),
- features = []):
+ _latitude = None
+ _longitude = None
+ _nb_stations = None
+ _start = None
+ _end = None
+ _features = None
+
+ def __init__(self, config_file):
'''
Constructor of the MeteoFrance source of feature.
https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
Parameters:
+ - in config file:
latitude (float): The latitude from which we want the meteo features.
longitude (float): The longitude from which we want the meteo features.
nb_stations (int): Number of closest stations to consider.
+ - provided to the constructor
features (list): Weather features that have to be integrated, according
to their names in meteofrance_features.csv (cf. config directory)
'''
- self._latitude = latitude
- self._longitude = longitude
- self._nb_stations = nb_stations
- self._start = start
- self._end = end
- self._features = features
+ self._config = ConfigParser()
+ self._config.read(config_file)
self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
self._dated_features = None
# Re-creating data directory architecture for MeteoFrance, if asked
- config = ConfigParser()
- config.read((Path.cwd() / 'config') / 'features.cfg')
- if eval(config['meteofrance']['regenerate']):
+ if self._config['GENERAL'].getboolean('regenerate'):
self._regenerate_directory()
# Collecting the closest meteo station
+ self._nb_stations = self._config['STATIONS'].getint('nb_stations')
self._stations = self._get_stations()
+ # Collecting meteofrance features
+ with open(CSV_FILE, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ self._features = [row['name'] for row in reader
+ if self._config['FEATURES'].getboolean(row['name'])]
+
+
+ @property
+ def start(self):
+ return self._start
+
+ @start.setter
+ def start(self, x):
+ self._start = x
+
+
+ @property
+ def end(self):
+ return self._end
+
+ @end.setter
+ def end(self, x):
+ self._end = x
+
+
+ @property
+ def latitude(self):
+ return self._latitude
+
+ @latitude.setter
+ def latitude(self, x):
+ self._latitude = x
+
+
+ @property
+ def longitude(self):
+ return self._longitude
+
+ @longitude.setter
+ def longitude(self, x):
+ self._longitude = x
+
+
+ @property
+ def nb_stations(self):
+ return self._nb_stations
+
+ @nb_stations.setter
+ def nb_stations(self, x):
+ self._nb_stations = x
def _regenerate_directory(self):
dict: the dictionary of features per datestamp
'''
if self._dated_features == None:
- csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
- logger.info(f'Collecting meteo feature information from {csv_file}')
+ logger.info(f'Collecting meteo feature information from {CSV_FILE}')
# A dictionary for the features
- with open(csv_file, "r") as f:
+ with open(CSV_FILE, "r") as f:
reader = DictReader(f, delimiter=',')
dico_features = {row["abbreviation"]:
{
'type': row['type'] # qualitative (2) or quantitative (1)
}
for row in reader if row['name'] in self._features}
+ #print([row for row in reader])
+ #print([row for row in reader if row['name'] in self._features])
dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
self._dated_features = {}
for csv_meteo in listdir(dir_data):