-from .meteofrance import MeteoFrance
\ No newline at end of file
--- /dev/null
+from datetime import datetime, timedelta
+import time
+import calendar
+
+class Ephemeris:
+
+ def __init__(self, start = time.strptime('19960101000000', '%Y%m%d%H%M%S'),
+ end = datetime.now(), features = []):
+ self._start = start
+ self._end = end
+ self._features = features
+
+ self._dated_features = {}
+
+
+
+ def update(self):
+ pass
+
+
+
+ @property
+ def dated_features(self):
+ if self._dated_features == {}:
+ date = self._start
+ while date <= self._end:
+ dict_hour = {}
+ Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
+ for feature in self._features:
+ if feature == 'hour':
+ dict_hour['hour'] = Date.tm_hour
+ elif feature == 'dayInWeek':
+ dict_hour['dayInWeek'] = Date.tm_wday
+ elif feature == 'dayInMonth':
+ dict_hour['dayInMonth'] = Date.tm_mday
+ elif feature == 'month':
+ dict_hour['month'] = Date.tm_mon
+ elif feature == 'year':
+ dict_hour['year'] = Date.tm_year
+ elif feature == 'dayInYear':
+ # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
+ # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
+ # les 24 décembre... tombent toujours
+ if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"):
+ dict_hour['dayInYear'] = Date.tm_yday -1
+ else:
+ dict_hour['dayInYear'] = Date.tm_yday
+ elif feature == 'weekInYear':
+ dict_hour['weekInYear'] = date.isocalendar()[1]
+ self._dated_features[date] = dict_hour
+ date += timedelta(hours=1)
+ return self._dated_features
\ No newline at end of file
import gzip
+
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
class MeteoFrance:
- def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3):
+ def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+ start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
+ end = datetime.now(),
+ features = []):
'''
Constructor of the MeteoFrance source of feature.
latitude (float): The latitude from which we want the meteo features.
longitude (float): The longitude from which we want the meteo features.
nb_stations (int): Number of closest stations to consider.
+ features (list): Weather features that have to be integrated, according
+ to their names in meteofrance_features.csv (cf. config directory)
'''
self._latitude = latitude
self._longitude = longitude
self._nb_stations = nb_stations
+ self._start = start
+ self._end = end
+ self._features = features
self._data_directory = (Path.cwd() / 'data') / 'meteo_france'
'''
# List of year-months to consider
historical = []
- date_end = datetime.now()
- for year in range(1996, date_end.year+1):
+ date_end = self._end
+ for year in range(self._start.year, date_end.year+1):
for month in range(1,13):
date = datetime(year, month, 1)
- if date <= date_end:
+ if date >= self._start and date <= date_end:
historical.append(date.strftime("%Y%m"))
# We download all csv files from meteofrance that are not in
dict: the dictionary of features per datestamp
'''
if self._dated_features == None:
- csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance' / 'meteofrance_features.csv'
+ csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
logger.info(f'Collecting meteo feature information from {csv_file}')
# A dictionary for the features
with open(csv_file, "r") as f:
reader = DictReader(f, delimiter=',')
- next(reader)
dico_features = {row["abbreviation"]:
{
'name': row['name'], # feature name
'type': row['type'] # qualitative (2) or quantitative (1)
}
- for row in reader}
-
+ for row in reader if row['name'] in self._features}
dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical'
self._dated_features = {}
for csv_meteo in listdir(dir_data):
- logger.info(f'Inserting {csv_meteo} in intervention dictionary')
- with open(dir_data / csv_meteo, "r") as f:
- reader = DictReader(f, delimiter=';')
- for row in reader:
- if row['numer_sta'] in self._stations:
- self._dated_features.setdefault(row['date'],{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+ date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
+ if date >= self._start and date <= self._end:
+ logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+ with open(dir_data / csv_meteo, "r") as f:
+ reader = DictReader(f, delimiter=';')
+ for row in reader:
+ if row['numer_sta'] in self._stations:
+ date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
+ self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
return self._dated_features
+++ /dev/null
-from pathlib import Path
-from shutil import rmtree
-from configparser import ConfigParser
-from os import remove
-from subprocess import Popen, PIPE
-from sys import argv
-import logging
-from logging.config import fileConfig
-
-fileConfig((Path.cwd() / 'config') / 'logging.cfg')
-logger = logging.getLogger()
-
-argument = argv[-1]
-
-if argument in ['data', 'all']:
- logger.info("Cleaning and restoring data directory")
- directory = Path.cwd() / 'data'
- if directory.is_dir():
- rmtree(directory)
- p = Path(Path.cwd() / 'data')
- p.mkdir()
-
-# Cleaning the postgresql database
-if argument in ['db', 'all']:
- config = ConfigParser()
- config.read((Path.cwd() / 'config') / 'main.cfg')
-
- host = config['postgresql']['host']
- user = config['postgresql']['user']
- port = config['postgresql']['port']
- dbname = config['postgresql']['dbname']
-
- logger.info("PostgreSQL database deletion")
- command = ['dropdb', '-h', host, '-U', user, '-p', port, dbname]
- process = Popen(command, stdout=PIPE, stderr=PIPE)
- stdout, stderr = process.communicate()
-
- logger.info("PostgreSQL database creation")
- command = ['createdb', '-h', host, '-U', user, '-p', port, dbname]
- process = Popen(command, stdout=PIPE, stderr=PIPE)
- stdout, stderr = process.communicate()
+++ /dev/null
-from pathlib import Path
-import psycopg2
-import configparser
-
-class Singleton:
-
- def __init__(self, cls):
- self._cls = cls
-
- def Instance(self):
- try:
- return self._instance
- except AttributeError:
- self._instance = self._cls()
- return self._instance
-
- def __call__(self):
- raise TypeError('Singletons must be accessed through `Instance()`.')
-
- def __instancecheck__(self, inst):
- return isinstance(inst, self._cls)
-
-@Singleton
-class PostgreSQLDBConnection(object):
- """Postgresql database connection"""
-
- def __init__(self, connection_string = ''):
- if connection_string == '':
- # We're retrieving information related to the database in config.ini
- config = configparser.ConfigParser()
- config.read((Path.cwd() / 'config') / 'main.cfg')
-
- host = config['postgresql']['host']
- user = config['postgresql']['user']
- port = config['postgresql']['port']
- self.dbname = config['postgresql']['dbname']
-
- self.connection_string = f"host={host} port={port} dbname={self.dbname} user={user}"
-
- else:
- self.connection_string = connection_string
- self.dbname = ''
-
-
- def __enter__(self):
- self.connection = psycopg2.connect(self.connection_string)
- self.connection.autocommit = True
- self.cursor = self.connection.cursor()
- return self
-
- @property
- def name(self):
- return self.dbname
-
- def __str__(self):
- return 'Database connection object'
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- #self.connection.commit()
- self.cursor.close()
- self.connection.close()
-
-from lib.source import MeteoFrance
+from lib.source.ephemeris import Ephemeris
+from lib.source.meteofrance import MeteoFrance
+from datetime import datetime
from logging import getLogger
from logging.config import fileConfig
from pathlib import Path
from shutil import rmtree
-
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
class Engine:
- def __init__(self, clean = False):
+ def __init__(self, start = None, end = None, time_step = None):
logger.info("Predictops engine launched")
- if clean:
- self.clean()
- print("To prevent from downloading again csv files, copy the archive in data rep")
+ self._X = {}
+ self._Y = {}
+
def clean(self):
# Cleaning the data directory
p.mkdir()
- def add_meteofrance(self):
- self.meteofrance = MeteoFrance()
+ def add_feature(self, name, **kw):
+
+ if name == 'meteofrance':
+ meteofeature = MeteoFrance(**kw)
+ meteofeature.update()
+ dated_features = meteofeature.dated_features
+ for date in dated_features:
+ self._X.setdefault(date,{}).update(dated_features[date])
+ elif name == 'ephemeris':
+ ephemerides = Ephemeris(**kw)
+ dated_features = ephemerides.dated_features
+ for date in dated_features:
+ self._X.setdefault(date,{}).update(dated_features[date])
+
+
+ @property
+ def X(self):
+ return self._X
+
+ @X.setter
+ def X(self, x):
+ self._X = x
+
+
+start = datetime.strptime('01/01/2010 00:00:00', '%m/%d/%Y %H:%M:%S')
+end = datetime.strptime('12/31/2010 23:00:00', '%m/%d/%Y %H:%M:%S')
+engine = Engine()
+engine.add_feature(name = 'meteofrance',
+ start = start, end = end,
+ latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+ features = ['temperature', 'pressure'])
+engine.add_feature(name = 'ephemeris',
+ start = start, end = end,
+ features = ['hour', 'dayInWeek', 'dayInMonth', 'dayInYear',
+ 'weekInYear', 'month', 'year'])
-engine = Engine(clean = False)
-engine.add_meteofrance()
-engine.meteofrance.update()
-print(len(engine.meteofrance.dated_features))
\ No newline at end of file
+print(engine.X)
\ No newline at end of file