]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Adding calendar features
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 11 Feb 2020 10:05:45 +0000 (11:05 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Tue, 11 Feb 2020 10:05:45 +0000 (11:05 +0100)
config/features/meteofrance_features.csv [moved from config/features/meteofrance/meteofrance_features.csv with 100% similarity]
lib/source/__init__.py
lib/source/ephemerides.py [deleted file]
lib/source/ephemeris.py [new file with mode: 0644]
lib/source/meteofrance.py
lib/tools/cleaner.py [deleted file]
lib/tools/connector.py [deleted file]
main.py

index 527538dc22068b2b132b65859781c166abdd8a3b..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 (file)
@@ -1 +0,0 @@
-from .meteofrance import MeteoFrance
\ No newline at end of file
diff --git a/lib/source/ephemerides.py b/lib/source/ephemerides.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/lib/source/ephemeris.py b/lib/source/ephemeris.py
new file mode 100644 (file)
index 0000000..33c0f2d
--- /dev/null
@@ -0,0 +1,52 @@
+from datetime import datetime, timedelta
+import time
+import calendar
+
+class Ephemeris:
+
+    def __init__(self, start = time.strptime('19960101000000', '%Y%m%d%H%M%S'),
+                 end = datetime.now(), features = []):
+        self._start = start
+        self._end = end
+        self._features = features
+
+        self._dated_features = {}
+
+
+
+    def update(self):
+        pass
+
+
+
+    @property
+    def dated_features(self):
+        if self._dated_features == {}:
+            date = self._start
+            while date <= self._end:
+                dict_hour = {}
+                Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
+                for feature in self._features:
+                    if feature == 'hour':
+                        dict_hour['hour'] = Date.tm_hour
+                    elif feature == 'dayInWeek':
+                        dict_hour['dayInWeek'] = Date.tm_wday
+                    elif feature == 'dayInMonth':
+                        dict_hour['dayInMonth'] = Date.tm_mday
+                    elif feature == 'month':
+                        dict_hour['month'] = Date.tm_mon
+                    elif feature == 'year':
+                        dict_hour['year'] = Date.tm_year
+                    elif feature == 'dayInYear':
+                        # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
+                        # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
+                        # les 24 décembre... tombent toujours
+                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"):
+                            dict_hour['dayInYear'] = Date.tm_yday -1
+                        else:
+                            dict_hour['dayInYear'] = Date.tm_yday
+                    elif feature == 'weekInYear':
+                        dict_hour['weekInYear'] = date.isocalendar()[1]
+                self._dated_features[date] = dict_hour
+                date += timedelta(hours=1)
+        return self._dated_features
\ No newline at end of file
index cc2eff1b96024fb6421cee1e011b491a583e2b74..c524089e6362d0ec48c6d6aab57596b9a95887bf 100644 (file)
@@ -12,12 +12,16 @@ from urllib.request import urlretrieve
 
 import gzip
 
 
 import gzip
 
+
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 class MeteoFrance:
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 class MeteoFrance:
 
-    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3):
+    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+                 start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
+                 end = datetime.now(),
+                 features = []):
         '''
         Constructor of the MeteoFrance source of feature.
 
         '''
         Constructor of the MeteoFrance source of feature.
 
@@ -33,11 +37,16 @@ class MeteoFrance:
             latitude (float): The latitude from which we want the meteo features.
             longitude (float): The longitude from which we want the meteo features.
             nb_stations (int): Number of closest stations to consider.
             latitude (float): The latitude from which we want the meteo features.
             longitude (float): The longitude from which we want the meteo features.
             nb_stations (int): Number of closest stations to consider.
+            features (list): Weather features that have to be integrated, according
+                  to their names in meteofrance_features.csv (cf. config directory)
 
         '''
         self._latitude = latitude
         self._longitude = longitude
         self._nb_stations = nb_stations
 
         '''
         self._latitude = latitude
         self._longitude = longitude
         self._nb_stations = nb_stations
+        self._start = start
+        self._end = end
+        self._features = features
 
         self._data_directory = (Path.cwd() / 'data') / 'meteo_france'
 
 
         self._data_directory = (Path.cwd() / 'data') / 'meteo_france'
 
@@ -123,11 +132,11 @@ class MeteoFrance:
         '''
         # List of year-months to consider
         historical = []
         '''
         # List of year-months to consider
         historical = []
-        date_end = datetime.now()
-        for year in range(1996, date_end.year+1):
+        date_end = self._end
+        for year in range(self._start.year, date_end.year+1):
             for month in range(1,13):
                 date = datetime(year, month, 1)
             for month in range(1,13):
                 date = datetime(year, month, 1)
-                if date <= date_end:
+                if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
 
         # We download all csv files from meteofrance that are not in
                     historical.append(date.strftime("%Y%m"))
 
         # We download all csv files from meteofrance that are not in
@@ -185,27 +194,28 @@ class MeteoFrance:
             dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
             dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
-            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance' / 'meteofrance_features.csv'
+            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
             logger.info(f'Collecting meteo feature information from {csv_file}')
             # A dictionary for the features
             with open(csv_file, "r") as f:
                 reader = DictReader(f, delimiter=',')
             logger.info(f'Collecting meteo feature information from {csv_file}')
             # A dictionary for the features
             with open(csv_file, "r") as f:
                 reader = DictReader(f, delimiter=',')
-                next(reader)
                 dico_features = {row["abbreviation"]:
                                    {
                                        'name': row['name'], # feature name
                                        'type': row['type']  # qualitative (2) or quantitative (1)
                                     }
                 dico_features = {row["abbreviation"]:
                                    {
                                        'name': row['name'], # feature name
                                        'type': row['type']  # qualitative (2) or quantitative (1)
                                     }
-                                for row in reader}
-
+                                for row in reader if row['name'] in self._features}
             dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical'
             self._dated_features = {}
             for csv_meteo in listdir(dir_data):
             dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical'
             self._dated_features = {}
             for csv_meteo in listdir(dir_data):
-                logger.info(f'Inserting {csv_meteo} in intervention dictionary')
-                with open(dir_data / csv_meteo, "r") as f:
-                    reader = DictReader(f, delimiter=';')
-                    for row in reader:
-                        if row['numer_sta'] in self._stations:
-                            self._dated_features.setdefault(row['date'],{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
+                if date >= self._start and date <= self._end:
+                    logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+                    with open(dir_data / csv_meteo, "r") as f:
+                        reader = DictReader(f, delimiter=';')
+                        for row in reader:
+                            if row['numer_sta'] in self._stations:
+                                date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
+                                self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
         return self._dated_features
 
         return self._dated_features
 
diff --git a/lib/tools/cleaner.py b/lib/tools/cleaner.py
deleted file mode 100644 (file)
index 1ee1ba4..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-from pathlib import Path
-from shutil import rmtree
-from configparser import ConfigParser
-from os import remove
-from subprocess import Popen, PIPE
-from sys import argv
-import logging
-from logging.config import fileConfig
-
-fileConfig((Path.cwd() / 'config') / 'logging.cfg')
-logger = logging.getLogger()
-
-argument = argv[-1]
-
-if argument in ['data', 'all']:
-    logger.info("Cleaning and restoring data directory")
-    directory  = Path.cwd() / 'data'
-    if directory.is_dir():
-        rmtree(directory)
-    p = Path(Path.cwd() / 'data')
-    p.mkdir()
-
-# Cleaning the postgresql database
-if argument in ['db', 'all']:
-    config = ConfigParser()
-    config.read((Path.cwd() / 'config') / 'main.cfg')
-    
-    host   = config['postgresql']['host']
-    user   = config['postgresql']['user']
-    port   = config['postgresql']['port']
-    dbname = config['postgresql']['dbname']
-    
-    logger.info("PostgreSQL database deletion")
-    command = ['dropdb', '-h', host, '-U', user, '-p', port, dbname]
-    process = Popen(command, stdout=PIPE, stderr=PIPE)
-    stdout, stderr = process.communicate()
-    
-    logger.info("PostgreSQL database creation")
-    command = ['createdb', '-h', host, '-U', user, '-p', port, dbname]
-    process = Popen(command, stdout=PIPE, stderr=PIPE)
-    stdout, stderr = process.communicate()
diff --git a/lib/tools/connector.py b/lib/tools/connector.py
deleted file mode 100644 (file)
index a0cc0d5..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-from pathlib import Path
-import psycopg2
-import configparser
-
-class Singleton:
-
-    def __init__(self, cls):
-        self._cls = cls
-
-    def Instance(self):
-        try:
-            return self._instance
-        except AttributeError:
-            self._instance = self._cls()
-            return self._instance
-
-    def __call__(self):
-        raise TypeError('Singletons must be accessed through `Instance()`.')
-
-    def __instancecheck__(self, inst):
-        return isinstance(inst, self._cls)
-    
-@Singleton
-class PostgreSQLDBConnection(object):
-    """Postgresql database connection"""
-    
-    def __init__(self, connection_string = ''):
-        if connection_string == '':
-            # We're retrieving information related to the database in config.ini
-            config = configparser.ConfigParser()
-            config.read((Path.cwd() / 'config') / 'main.cfg')
-    
-            host   = config['postgresql']['host']
-            user   = config['postgresql']['user']
-            port   = config['postgresql']['port']
-            self.dbname = config['postgresql']['dbname']
-            
-            self.connection_string = f"host={host} port={port} dbname={self.dbname} user={user}"
-            
-        else:
-            self.connection_string = connection_string
-            self.dbname = ''
-
-            
-    def __enter__(self):
-        self.connection = psycopg2.connect(self.connection_string)
-        self.connection.autocommit = True
-        self.cursor = self.connection.cursor()
-        return self
-
-    @property   
-    def name(self):
-        return self.dbname
-    
-    def __str__(self):
-        return 'Database connection object'
-    
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        #self.connection.commit()
-        self.cursor.close()
-        self.connection.close()        
-
diff --git a/main.py b/main.py
index 9ccb687252334a014f74a9d4bdf468977947ccca..6733216b474c45a136766be52e83ed3e3a0591e7 100644 (file)
--- a/main.py
+++ b/main.py
@@ -1,21 +1,22 @@
-from lib.source import MeteoFrance
+from lib.source.ephemeris import Ephemeris
+from lib.source.meteofrance import MeteoFrance
 
 
+from datetime import datetime
 from logging import getLogger
 from logging.config import fileConfig
 from pathlib import Path
 from shutil import rmtree
 
 from logging import getLogger
 from logging.config import fileConfig
 from pathlib import Path
 from shutil import rmtree
 
-
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 
 class Engine:
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 
 class Engine:
-    def __init__(self, clean = False):
+    def __init__(self, start = None, end = None, time_step = None):
         logger.info("Predictops engine launched")
         logger.info("Predictops engine launched")
-        if clean:
-            self.clean()
-            print("To prevent from downloading again csv files, copy the archive in data rep")
+        self._X = {}
+        self._Y = {}
+
 
     def clean(self):
         # Cleaning the data directory
 
     def clean(self):
         # Cleaning the data directory
@@ -27,12 +28,42 @@ class Engine:
         p.mkdir()
 
 
         p.mkdir()
 
 
-    def add_meteofrance(self):
-        self.meteofrance = MeteoFrance()
+    def add_feature(self, name, **kw):
+
+        if name == 'meteofrance':
+            meteofeature = MeteoFrance(**kw)
+            meteofeature.update()
+            dated_features = meteofeature.dated_features
+            for date in dated_features:
+                self._X.setdefault(date,{}).update(dated_features[date])
+        elif name == 'ephemeris':
+            ephemerides = Ephemeris(**kw)
+            dated_features = ephemerides.dated_features
+            for date in dated_features:
+                self._X.setdefault(date,{}).update(dated_features[date])
+
+
+    @property
+    def X(self):
+        return self._X
+
+    @X.setter
+    def X(self, x):
+        self._X = x
+
+
+start = datetime.strptime('01/01/2010 00:00:00', '%m/%d/%Y %H:%M:%S')
+end = datetime.strptime('12/31/2010 23:00:00', '%m/%d/%Y %H:%M:%S')
 
 
+engine = Engine()
+engine.add_feature(name = 'meteofrance',
+                   start = start, end = end,
+                   latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+                   features = ['temperature', 'pressure'])
 
 
+engine.add_feature(name = 'ephemeris',
+                   start = start, end = end,
+                   features = ['hour', 'dayInWeek', 'dayInMonth', 'dayInYear',
+                               'weekInYear', 'month', 'year'])
 
 
-engine = Engine(clean = False)
-engine.add_meteofrance()
-engine.meteofrance.update()
-print(len(engine.meteofrance.dated_features))
\ No newline at end of file
+print(engine.X)
\ No newline at end of file