From f96e8f9dd1e1026a372660eec51452eee33bb2f0 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Tue, 11 Feb 2020 11:05:45 +0100 Subject: [PATCH] Adding calendar features --- .../meteofrance_features.csv | 0 lib/source/__init__.py | 1 - lib/source/ephemerides.py | 0 lib/source/ephemeris.py | 52 ++++++++++++++++ lib/source/meteofrance.py | 38 +++++++----- lib/tools/cleaner.py | 41 ------------ lib/tools/connector.py | 62 ------------------- main.py | 55 ++++++++++++---- 8 files changed, 119 insertions(+), 130 deletions(-) rename config/features/{meteofrance => }/meteofrance_features.csv (100%) delete mode 100644 lib/source/ephemerides.py create mode 100644 lib/source/ephemeris.py delete mode 100644 lib/tools/cleaner.py delete mode 100644 lib/tools/connector.py diff --git a/config/features/meteofrance/meteofrance_features.csv b/config/features/meteofrance_features.csv similarity index 100% rename from config/features/meteofrance/meteofrance_features.csv rename to config/features/meteofrance_features.csv diff --git a/lib/source/__init__.py b/lib/source/__init__.py index 527538d..e69de29 100644 --- a/lib/source/__init__.py +++ b/lib/source/__init__.py @@ -1 +0,0 @@ -from .meteofrance import MeteoFrance \ No newline at end of file diff --git a/lib/source/ephemerides.py b/lib/source/ephemerides.py deleted file mode 100644 index e69de29..0000000 diff --git a/lib/source/ephemeris.py b/lib/source/ephemeris.py new file mode 100644 index 0000000..33c0f2d --- /dev/null +++ b/lib/source/ephemeris.py @@ -0,0 +1,52 @@ +from datetime import datetime, timedelta +import time +import calendar + +class Ephemeris: + + def __init__(self, start = time.strptime('19960101000000', '%Y%m%d%H%M%S'), + end = datetime.now(), features = []): + self._start = start + self._end = end + self._features = features + + self._dated_features = {} + + + + def update(self): + pass + + + + @property + def dated_features(self): + if self._dated_features == {}: + date = self._start + while date <= self._end: + dict_hour = {} + Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S') + for feature in self._features: + if feature == 'hour': + dict_hour['hour'] = Date.tm_hour + elif feature == 'dayInWeek': + dict_hour['dayInWeek'] = Date.tm_wday + elif feature == 'dayInMonth': + dict_hour['dayInMonth'] = Date.tm_mday + elif feature == 'month': + dict_hour['month'] = Date.tm_mon + elif feature == 'year': + dict_hour['year'] = Date.tm_year + elif feature == 'dayInYear': + # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée + # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets, + # les 24 décembre... tombent toujours + if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"): + dict_hour['dayInYear'] = Date.tm_yday -1 + else: + dict_hour['dayInYear'] = Date.tm_yday + elif feature == 'weekInYear': + dict_hour['weekInYear'] = date.isocalendar()[1] + self._dated_features[date] = dict_hour + date += timedelta(hours=1) + return self._dated_features \ No newline at end of file diff --git a/lib/source/meteofrance.py b/lib/source/meteofrance.py index cc2eff1..c524089 100644 --- a/lib/source/meteofrance.py +++ b/lib/source/meteofrance.py @@ -12,12 +12,16 @@ from urllib.request import urlretrieve import gzip + fileConfig((Path.cwd() / 'config') / 'logging.cfg') logger = getLogger() class MeteoFrance: - def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3): + def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3, + start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'), + end = datetime.now(), + features = []): ''' Constructor of the MeteoFrance source of feature. @@ -33,11 +37,16 @@ class MeteoFrance: latitude (float): The latitude from which we want the meteo features. longitude (float): The longitude from which we want the meteo features. nb_stations (int): Number of closest stations to consider. + features (list): Weather features that have to be integrated, according + to their names in meteofrance_features.csv (cf. config directory) ''' self._latitude = latitude self._longitude = longitude self._nb_stations = nb_stations + self._start = start + self._end = end + self._features = features self._data_directory = (Path.cwd() / 'data') / 'meteo_france' @@ -123,11 +132,11 @@ class MeteoFrance: ''' # List of year-months to consider historical = [] - date_end = datetime.now() - for year in range(1996, date_end.year+1): + date_end = self._end + for year in range(self._start.year, date_end.year+1): for month in range(1,13): date = datetime(year, month, 1) - if date <= date_end: + if date >= self._start and date <= date_end: historical.append(date.strftime("%Y%m")) # We download all csv files from meteofrance that are not in @@ -185,27 +194,28 @@ class MeteoFrance: dict: the dictionary of features per datestamp ''' if self._dated_features == None: - csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance' / 'meteofrance_features.csv' + csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv' logger.info(f'Collecting meteo feature information from {csv_file}') # A dictionary for the features with open(csv_file, "r") as f: reader = DictReader(f, delimiter=',') - next(reader) dico_features = {row["abbreviation"]: { 'name': row['name'], # feature name 'type': row['type'] # qualitative (2) or quantitative (1) } - for row in reader} - + for row in reader if row['name'] in self._features} dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical' self._dated_features = {} for csv_meteo in listdir(dir_data): - logger.info(f'Inserting {csv_meteo} in intervention dictionary') - with open(dir_data / csv_meteo, "r") as f: - reader = DictReader(f, delimiter=';') - for row in reader: - if row['numer_sta'] in self._stations: - self._dated_features.setdefault(row['date'],{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features}) + date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m') + if date >= self._start and date <= self._end: + logger.info(f'Inserting {csv_meteo} in intervention dictionary') + with open(dir_data / csv_meteo, "r") as f: + reader = DictReader(f, delimiter=';') + for row in reader: + if row['numer_sta'] in self._stations: + date = datetime.strptime(row['date'], '%Y%m%d%H%M%S') + self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features}) return self._dated_features diff --git a/lib/tools/cleaner.py b/lib/tools/cleaner.py deleted file mode 100644 index 1ee1ba4..0000000 --- a/lib/tools/cleaner.py +++ /dev/null @@ -1,41 +0,0 @@ -from pathlib import Path -from shutil import rmtree -from configparser import ConfigParser -from os import remove -from subprocess import Popen, PIPE -from sys import argv -import logging -from logging.config import fileConfig - -fileConfig((Path.cwd() / 'config') / 'logging.cfg') -logger = logging.getLogger() - -argument = argv[-1] - -if argument in ['data', 'all']: - logger.info("Cleaning and restoring data directory") - directory = Path.cwd() / 'data' - if directory.is_dir(): - rmtree(directory) - p = Path(Path.cwd() / 'data') - p.mkdir() - -# Cleaning the postgresql database -if argument in ['db', 'all']: - config = ConfigParser() - config.read((Path.cwd() / 'config') / 'main.cfg') - - host = config['postgresql']['host'] - user = config['postgresql']['user'] - port = config['postgresql']['port'] - dbname = config['postgresql']['dbname'] - - logger.info("PostgreSQL database deletion") - command = ['dropdb', '-h', host, '-U', user, '-p', port, dbname] - process = Popen(command, stdout=PIPE, stderr=PIPE) - stdout, stderr = process.communicate() - - logger.info("PostgreSQL database creation") - command = ['createdb', '-h', host, '-U', user, '-p', port, dbname] - process = Popen(command, stdout=PIPE, stderr=PIPE) - stdout, stderr = process.communicate() diff --git a/lib/tools/connector.py b/lib/tools/connector.py deleted file mode 100644 index a0cc0d5..0000000 --- a/lib/tools/connector.py +++ /dev/null @@ -1,62 +0,0 @@ -from pathlib import Path -import psycopg2 -import configparser - -class Singleton: - - def __init__(self, cls): - self._cls = cls - - def Instance(self): - try: - return self._instance - except AttributeError: - self._instance = self._cls() - return self._instance - - def __call__(self): - raise TypeError('Singletons must be accessed through `Instance()`.') - - def __instancecheck__(self, inst): - return isinstance(inst, self._cls) - -@Singleton -class PostgreSQLDBConnection(object): - """Postgresql database connection""" - - def __init__(self, connection_string = ''): - if connection_string == '': - # We're retrieving information related to the database in config.ini - config = configparser.ConfigParser() - config.read((Path.cwd() / 'config') / 'main.cfg') - - host = config['postgresql']['host'] - user = config['postgresql']['user'] - port = config['postgresql']['port'] - self.dbname = config['postgresql']['dbname'] - - self.connection_string = f"host={host} port={port} dbname={self.dbname} user={user}" - - else: - self.connection_string = connection_string - self.dbname = '' - - - def __enter__(self): - self.connection = psycopg2.connect(self.connection_string) - self.connection.autocommit = True - self.cursor = self.connection.cursor() - return self - - @property - def name(self): - return self.dbname - - def __str__(self): - return 'Database connection object' - - def __exit__(self, exc_type, exc_val, exc_tb): - #self.connection.commit() - self.cursor.close() - self.connection.close() - diff --git a/main.py b/main.py index 9ccb687..6733216 100644 --- a/main.py +++ b/main.py @@ -1,21 +1,22 @@ -from lib.source import MeteoFrance +from lib.source.ephemeris import Ephemeris +from lib.source.meteofrance import MeteoFrance +from datetime import datetime from logging import getLogger from logging.config import fileConfig from pathlib import Path from shutil import rmtree - fileConfig((Path.cwd() / 'config') / 'logging.cfg') logger = getLogger() class Engine: - def __init__(self, clean = False): + def __init__(self, start = None, end = None, time_step = None): logger.info("Predictops engine launched") - if clean: - self.clean() - print("To prevent from downloading again csv files, copy the archive in data rep") + self._X = {} + self._Y = {} + def clean(self): # Cleaning the data directory @@ -27,12 +28,42 @@ class Engine: p.mkdir() - def add_meteofrance(self): - self.meteofrance = MeteoFrance() + def add_feature(self, name, **kw): + + if name == 'meteofrance': + meteofeature = MeteoFrance(**kw) + meteofeature.update() + dated_features = meteofeature.dated_features + for date in dated_features: + self._X.setdefault(date,{}).update(dated_features[date]) + elif name == 'ephemeris': + ephemerides = Ephemeris(**kw) + dated_features = ephemerides.dated_features + for date in dated_features: + self._X.setdefault(date,{}).update(dated_features[date]) + + + @property + def X(self): + return self._X + + @X.setter + def X(self, x): + self._X = x + + +start = datetime.strptime('01/01/2010 00:00:00', '%m/%d/%Y %H:%M:%S') +end = datetime.strptime('12/31/2010 23:00:00', '%m/%d/%Y %H:%M:%S') +engine = Engine() +engine.add_feature(name = 'meteofrance', + start = start, end = end, + latitude = 47.25, longitude = 6.0333, nb_stations = 3, + features = ['temperature', 'pressure']) +engine.add_feature(name = 'ephemeris', + start = start, end = end, + features = ['hour', 'dayInWeek', 'dayInMonth', 'dayInYear', + 'weekInYear', 'month', 'year']) -engine = Engine(clean = False) -engine.add_meteofrance() -engine.meteofrance.update() -print(len(engine.meteofrance.dated_features)) \ No newline at end of file +print(engine.X) \ No newline at end of file -- 2.39.5