]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Adding calendar features
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 11 Feb 2020 10:05:45 +0000 (11:05 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 11 Feb 2020 10:05:45 +0000 (11:05 +0100)
config/features/meteofrance_features.csv [moved from config/features/meteofrance/meteofrance_features.csv with 100% similarity]
lib/source/__init__.py
lib/source/ephemerides.py [deleted file]
lib/source/ephemeris.py [new file with mode: 0644]
lib/source/meteofrance.py
lib/tools/cleaner.py [deleted file]
lib/tools/connector.py [deleted file]
main.py

index 527538dc22068b2b132b65859781c166abdd8a3b..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 (file)
@@ -1 +0,0 @@
-from .meteofrance import MeteoFrance
\ No newline at end of file
diff --git a/lib/source/ephemerides.py b/lib/source/ephemerides.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/lib/source/ephemeris.py b/lib/source/ephemeris.py
new file mode 100644 (file)
index 0000000..33c0f2d
--- /dev/null
@@ -0,0 +1,52 @@
+from datetime import datetime, timedelta
+import time
+import calendar
+
+class Ephemeris:
+
+    def __init__(self, start = time.strptime('19960101000000', '%Y%m%d%H%M%S'),
+                 end = datetime.now(), features = []):
+        self._start = start
+        self._end = end
+        self._features = features
+
+        self._dated_features = {}
+
+
+
+    def update(self):
+        pass
+
+
+
+    @property
+    def dated_features(self):
+        if self._dated_features == {}:
+            date = self._start
+            while date <= self._end:
+                dict_hour = {}
+                Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
+                for feature in self._features:
+                    if feature == 'hour':
+                        dict_hour['hour'] = Date.tm_hour
+                    elif feature == 'dayInWeek':
+                        dict_hour['dayInWeek'] = Date.tm_wday
+                    elif feature == 'dayInMonth':
+                        dict_hour['dayInMonth'] = Date.tm_mday
+                    elif feature == 'month':
+                        dict_hour['month'] = Date.tm_mon
+                    elif feature == 'year':
+                        dict_hour['year'] = Date.tm_year
+                    elif feature == 'dayInYear':
+                        # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
+                        # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
+                        # les 24 décembre... tombent toujours
+                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"):
+                            dict_hour['dayInYear'] = Date.tm_yday -1
+                        else:
+                            dict_hour['dayInYear'] = Date.tm_yday
+                    elif feature == 'weekInYear':
+                        dict_hour['weekInYear'] = date.isocalendar()[1]
+                self._dated_features[date] = dict_hour
+                date += timedelta(hours=1)
+        return self._dated_features
\ No newline at end of file
index cc2eff1b96024fb6421cee1e011b491a583e2b74..c524089e6362d0ec48c6d6aab57596b9a95887bf 100644 (file)
@@ -12,12 +12,16 @@ from urllib.request import urlretrieve
 
 import gzip
 
+
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 class MeteoFrance:
 
-    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3):
+    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+                 start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
+                 end = datetime.now(),
+                 features = []):
         '''
         Constructor of the MeteoFrance source of feature.
 
@@ -33,11 +37,16 @@ class MeteoFrance:
             latitude (float): The latitude from which we want the meteo features.
             longitude (float): The longitude from which we want the meteo features.
             nb_stations (int): Number of closest stations to consider.
+            features (list): Weather features that have to be integrated, according
+                  to their names in meteofrance_features.csv (cf. config directory)
 
         '''
         self._latitude = latitude
         self._longitude = longitude
         self._nb_stations = nb_stations
+        self._start = start
+        self._end = end
+        self._features = features
 
         self._data_directory = (Path.cwd() / 'data') / 'meteo_france'
 
@@ -123,11 +132,11 @@ class MeteoFrance:
         '''
         # List of year-months to consider
         historical = []
-        date_end = datetime.now()
-        for year in range(1996, date_end.year+1):
+        date_end = self._end
+        for year in range(self._start.year, date_end.year+1):
             for month in range(1,13):
                 date = datetime(year, month, 1)
-                if date <= date_end:
+                if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
 
         # We download all csv files from meteofrance that are not in
@@ -185,27 +194,28 @@ class MeteoFrance:
             dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
-            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance' / 'meteofrance_features.csv'
+            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
             logger.info(f'Collecting meteo feature information from {csv_file}')
             # A dictionary for the features
             with open(csv_file, "r") as f:
                 reader = DictReader(f, delimiter=',')
-                next(reader)
                 dico_features = {row["abbreviation"]:
                                    {
                                        'name': row['name'], # feature name
                                        'type': row['type']  # qualitative (2) or quantitative (1)
                                     }
-                                for row in reader}
-
+                                for row in reader if row['name'] in self._features}
             dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical'
             self._dated_features = {}
             for csv_meteo in listdir(dir_data):
-                logger.info(f'Inserting {csv_meteo} in intervention dictionary')
-                with open(dir_data / csv_meteo, "r") as f:
-                    reader = DictReader(f, delimiter=';')
-                    for row in reader:
-                        if row['numer_sta'] in self._stations:
-                            self._dated_features.setdefault(row['date'],{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
+                if date >= self._start and date <= self._end:
+                    logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+                    with open(dir_data / csv_meteo, "r") as f:
+                        reader = DictReader(f, delimiter=';')
+                        for row in reader:
+                            if row['numer_sta'] in self._stations:
+                                date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
+                                self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
         return self._dated_features
 
diff --git a/lib/tools/cleaner.py b/lib/tools/cleaner.py
deleted file mode 100644 (file)
index 1ee1ba4..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-from pathlib import Path
-from shutil import rmtree
-from configparser import ConfigParser
-from os import remove
-from subprocess import Popen, PIPE
-from sys import argv
-import logging
-from logging.config import fileConfig
-
-fileConfig((Path.cwd() / 'config') / 'logging.cfg')
-logger = logging.getLogger()
-
-argument = argv[-1]
-
-if argument in ['data', 'all']:
-    logger.info("Cleaning and restoring data directory")
-    directory  = Path.cwd() / 'data'
-    if directory.is_dir():
-        rmtree(directory)
-    p = Path(Path.cwd() / 'data')
-    p.mkdir()
-
-# Cleaning the postgresql database
-if argument in ['db', 'all']:
-    config = ConfigParser()
-    config.read((Path.cwd() / 'config') / 'main.cfg')
-    
-    host   = config['postgresql']['host']
-    user   = config['postgresql']['user']
-    port   = config['postgresql']['port']
-    dbname = config['postgresql']['dbname']
-    
-    logger.info("PostgreSQL database deletion")
-    command = ['dropdb', '-h', host, '-U', user, '-p', port, dbname]
-    process = Popen(command, stdout=PIPE, stderr=PIPE)
-    stdout, stderr = process.communicate()
-    
-    logger.info("PostgreSQL database creation")
-    command = ['createdb', '-h', host, '-U', user, '-p', port, dbname]
-    process = Popen(command, stdout=PIPE, stderr=PIPE)
-    stdout, stderr = process.communicate()
diff --git a/lib/tools/connector.py b/lib/tools/connector.py
deleted file mode 100644 (file)
index a0cc0d5..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-from pathlib import Path
-import psycopg2
-import configparser
-
-class Singleton:
-
-    def __init__(self, cls):
-        self._cls = cls
-
-    def Instance(self):
-        try:
-            return self._instance
-        except AttributeError:
-            self._instance = self._cls()
-            return self._instance
-
-    def __call__(self):
-        raise TypeError('Singletons must be accessed through `Instance()`.')
-
-    def __instancecheck__(self, inst):
-        return isinstance(inst, self._cls)
-    
-@Singleton
-class PostgreSQLDBConnection(object):
-    """Postgresql database connection"""
-    
-    def __init__(self, connection_string = ''):
-        if connection_string == '':
-            # We're retrieving information related to the database in config.ini
-            config = configparser.ConfigParser()
-            config.read((Path.cwd() / 'config') / 'main.cfg')
-    
-            host   = config['postgresql']['host']
-            user   = config['postgresql']['user']
-            port   = config['postgresql']['port']
-            self.dbname = config['postgresql']['dbname']
-            
-            self.connection_string = f"host={host} port={port} dbname={self.dbname} user={user}"
-            
-        else:
-            self.connection_string = connection_string
-            self.dbname = ''
-
-            
-    def __enter__(self):
-        self.connection = psycopg2.connect(self.connection_string)
-        self.connection.autocommit = True
-        self.cursor = self.connection.cursor()
-        return self
-
-    @property   
-    def name(self):
-        return self.dbname
-    
-    def __str__(self):
-        return 'Database connection object'
-    
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        #self.connection.commit()
-        self.cursor.close()
-        self.connection.close()        
-
diff --git a/main.py b/main.py
index 9ccb687252334a014f74a9d4bdf468977947ccca..6733216b474c45a136766be52e83ed3e3a0591e7 100644 (file)
--- a/main.py
+++ b/main.py
@@ -1,21 +1,22 @@
-from lib.source import MeteoFrance
+from lib.source.ephemeris import Ephemeris
+from lib.source.meteofrance import MeteoFrance
 
+from datetime import datetime
 from logging import getLogger
 from logging.config import fileConfig
 from pathlib import Path
 from shutil import rmtree
 
-
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 
 class Engine:
-    def __init__(self, clean = False):
+    def __init__(self, start = None, end = None, time_step = None):
         logger.info("Predictops engine launched")
-        if clean:
-            self.clean()
-            print("To prevent from downloading again csv files, copy the archive in data rep")
+        self._X = {}
+        self._Y = {}
+
 
     def clean(self):
         # Cleaning the data directory
@@ -27,12 +28,42 @@ class Engine:
         p.mkdir()
 
 
-    def add_meteofrance(self):
-        self.meteofrance = MeteoFrance()
+    def add_feature(self, name, **kw):
+
+        if name == 'meteofrance':
+            meteofeature = MeteoFrance(**kw)
+            meteofeature.update()
+            dated_features = meteofeature.dated_features
+            for date in dated_features:
+                self._X.setdefault(date,{}).update(dated_features[date])
+        elif name == 'ephemeris':
+            ephemerides = Ephemeris(**kw)
+            dated_features = ephemerides.dated_features
+            for date in dated_features:
+                self._X.setdefault(date,{}).update(dated_features[date])
+
+
+    @property
+    def X(self):
+        return self._X
+
+    @X.setter
+    def X(self, x):
+        self._X = x
+
+
+start = datetime.strptime('01/01/2010 00:00:00', '%m/%d/%Y %H:%M:%S')
+end = datetime.strptime('12/31/2010 23:00:00', '%m/%d/%Y %H:%M:%S')
 
+engine = Engine()
+engine.add_feature(name = 'meteofrance',
+                   start = start, end = end,
+                   latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+                   features = ['temperature', 'pressure'])
 
+engine.add_feature(name = 'ephemeris',
+                   start = start, end = end,
+                   features = ['hour', 'dayInWeek', 'dayInMonth', 'dayInYear',
+                               'weekInYear', 'month', 'year'])
 
-engine = Engine(clean = False)
-engine.add_meteofrance()
-engine.meteofrance.update()
-print(len(engine.meteofrance.dated_features))
\ No newline at end of file
+print(engine.X)
\ No newline at end of file