]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Refactoring, fin du lever/coucher de soleil, et début de sentinelles master
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
16 files changed:
config/features/feature_ephemeris.cfg
config/features/feature_meteo.cfg
config/learn.cfg
config/learners/lightgbm.cfg
config/learners/xgboost.cfg
config/targets/sdis25.cfg
predictops/engine.py
predictops/learn/learning.py
predictops/learn/preprocessing.py
predictops/source/ephemeris.py
predictops/source/holidays.py
predictops/source/meteofrance.py
predictops/source/ramadan.py
predictops/source/sentinelles.py [new file with mode: 0644]
predictops/target/target.py
requirements.txt

index 3ed31c76c6b3e5fa2124886b3adc5e0233d2841b..47c8f12e02b311075b85c8c159c13806cba94cdd 100644 (file)
@@ -31,4 +31,24 @@ numerical   = True
 [year]
 binary      = False
 categorical = False
 [year]
 binary      = False
 categorical = False
-numerical   = True
\ No newline at end of file
+numerical   = True
+
+[sunRised]
+binary      = True
+categorical = False
+numerical   = False
+
+[noon]
+binary      = True
+categorical = False
+numerical   = False
+
+[night]
+binary      = True
+categorical = False
+numerical   = False
+
+[daylightSavingTime]
+binary      = True
+categorical = False
+numerical   = False
index 5b694e5f021ad85d7da6c012c885fef1af514a13..a0a0eabead9422130c2440ba96560f10c23b32c6 100644 (file)
@@ -2,10 +2,6 @@
 regenerate = False
 reinsert   = True
 
 regenerate = False
 reinsert   = True
 
-[POSITION]
-latitude  = 47.25
-longitude = 6.0333
-
 [STATIONS]
 nb_stations = 3
 
 [STATIONS]
 nb_stations = 3
 
index aef0edd22f22e03a84ea12dacf04f4bcf3db6322..9081652d64e234726d3362fd36179efd124cdb22 100644 (file)
@@ -1,7 +1,17 @@
 [DATETIME]
 [DATETIME]
-start    = 01/01/2006 00:00:00
+start    = 01/01/2016 00:00:00
 end      = 12/31/2019 23:00:00
 end      = 12/31/2019 23:00:00
-hourStep = 1
+hourStep = 3
+
+
+[HISTORY_KNOWLEDGE]
+nb_lines = 24//3*7*4
+
+
+[TARGET]
+config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+cumulative  = True
+horizon     = 3
 
 
 [FEATURES]
 
 
 [FEATURES]
@@ -9,6 +19,7 @@ ephemeris   = True
 holidays    = True
 meteofrance = True
 ramadan     = True
 holidays    = True
 meteofrance = True
 ramadan     = True
+sentinelles = True
 
 
 [FEATURE_CONFIG]
 
 
 [FEATURE_CONFIG]
@@ -16,6 +27,7 @@ ephemeris   = (Path.cwd() / 'config') / 'features' / 'feature_ephemeris.cfg'
 holidays    = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
 meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
 ramadan     = (Path.cwd() / 'config') / 'features' / 'feature_ramadan.cfg'
 holidays    = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
 meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
 ramadan     = (Path.cwd() / 'config') / 'features' / 'feature_ramadan.cfg'
+sentinelles = (Path.cwd() / 'config') / 'features' / 'feature_sentinelles.cfg'
 
 
 [PREPROCESSING]
 
 
 [PREPROCESSING]
@@ -23,14 +35,5 @@ fill_method = linear
 order       = 3
 
 
 order       = 3
 
 
-[HISTORY_KNOWLEDGE]
-nb_lines = 24*7*4
-
-
-[TARGET]
-config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
-cumulative  = True
-horizon     = 0
-
 [LEARNER]
 config =  (Path.cwd() / 'config') / 'learners' / 'lightgbm.cfg'
\ No newline at end of file
 [LEARNER]
 config =  (Path.cwd() / 'config') / 'learners' / 'lightgbm.cfg'
\ No newline at end of file
index b5887f0497b8db84204398c21990d6df43c04559..32d19728c7fc29866135c3f9c205facc9b11b7ea 100644 (file)
@@ -4,7 +4,7 @@ method = lightgbm
 [HYPERPARAMETERS]
 learning_rate  = 0.1
 metric         = auc
 [HYPERPARAMETERS]
 learning_rate  = 0.1
 metric         = auc
-num_iterations = 1000
+num_iterations = 300
 num_round      = 10
 num_round      = 10
-num_leaves     = 31
+num_leaves     = 900
 objective      = poisson
 objective      = poisson
index f9e53298ef6586768c7c5d7d3a55e697f604670b..9a1836330d00b827edfeeede7a082e809efdd191 100644 (file)
@@ -4,7 +4,7 @@ method = xgboost
 
 [HYPERPARAMETERS]
 learning_rate = 0.01
 
 [HYPERPARAMETERS]
 learning_rate = 0.01
-max_depth     = 7
+max_depth     = 6
 random_state  = 42
 n_estimators  = 10000
 n_jobs        = -1
 random_state  = 42
 n_estimators  = 10000
 n_jobs        = -1
index 712e93b1d6f3ea78815c3c7d87e747b9a0c0d8fd..2d4f9f0d48a1ab0cd329dfadaa925f4121a618db 100644 (file)
@@ -1,3 +1,10 @@
+[POSITION]
+name      = Besançon
+country   = France
+timezone  = Europe/Paris
+latitude  = 47.237829
+longitude = -6.0240539
+
 [SPECIFICATION]
 origin      = False
 destination = False
 [SPECIFICATION]
 origin      = False
 destination = False
index a63ef2932aee9086fb23ede8902ec4ea0820adb8..bb85787ca304aa2643a5c1e2c42e53362ced5689 100644 (file)
@@ -6,13 +6,15 @@ from pathlib import Path
 from shutil import rmtree
 
 import os
 from shutil import rmtree
 
 import os
+import pytz
 
 from .learn.learning import Learning
 from .learn.preprocessing import Preprocessing
 from .source.ephemeris import Ephemeris
 from .source.holidays import Holidays
 
 from .learn.learning import Learning
 from .learn.preprocessing import Preprocessing
 from .source.ephemeris import Ephemeris
 from .source.holidays import Holidays
-from .source.ramadan import Ramadan
 from .source.meteofrance import MeteoFrance
 from .source.meteofrance import MeteoFrance
+from .source.ramadan import Ramadan
+from .source.sentinelles import Sentinelles
 from .target.target import Target
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 from .target.target import Target
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
@@ -68,10 +70,10 @@ class Engine(object):
                 self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
                 self._config_text += f.read() + '\n\n'
 
                 self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
                 self._config_text += f.read() + '\n\n'
 
-            ephemerides = Ephemeris(config_file=config_file)
+            ephemerides = Ephemeris(config_file=config_file, start=self._start, end=self._end)
 
 
-            ephemerides.start = self._start
-            ephemerides.end = self._end
+            ephemerides.start = self._start
+            ephemerides.end = self._end
 
             dated_features = ephemerides.dated_features
             for date in dated_features:
 
             dated_features = ephemerides.dated_features
             for date in dated_features:
@@ -136,6 +138,8 @@ class Engine(object):
 
     def add_preprocessing(self):
         self._preproc = Preprocessing(config_file=self._config,
 
     def add_preprocessing(self):
         self._preproc = Preprocessing(config_file=self._config,
+                                      start=self._start, end=self._end,
+                                      timestep=self._timestep,
                                       dict_features=self.X,
                                       dict_target=self.y)
 
                                       dict_features=self.X,
                                       dict_target=self.y)
 
index fb3675d56d7e007c557d93aeedd64bcc514a8df0..f13c3a6514eb2a8b552a4aa80fd13b275f590036 100644 (file)
@@ -32,10 +32,6 @@ class Learning:
         else:
             self._X = X
             self._y = y
         else:
             self._X = X
             self._y = y
-        rep = (Path.cwd() / self._file_name)
-        rep.mkdir()
-        self._filename = str(self._file_name / os.path.basename(self._file_name))
-        self._X.to_csv(self._filename + '.csv')
         self._learn()
         self._evaluate()
 
         self._learn()
         self._evaluate()
 
@@ -92,6 +88,10 @@ class Learning:
         for k in range(10):
             txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
 
         for k in range(10):
             txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
 
+        rep = (Path.cwd() / self._file_name)
+        rep.mkdir()
+        self._filename = str(self._file_name / os.path.basename(self._file_name))
+
         print(txt)
         with open(self._filename + ".result", 'w') as f:
             f.write(txt)
         print(txt)
         with open(self._filename + ".result", 'w') as f:
             f.write(txt)
index 9bc09ad2eca2759c22b6047c3ded8ab747e015de..55cffbd2a0e094a610580e1e4dbcdf8adc80de5d 100644 (file)
@@ -1,10 +1,7 @@
 from configparser import ConfigParser
 from configparser import ConfigParser
-from csv import DictReader
-from datetime import datetime, timedelta
 from itertools import chain
 from logging import getLogger
 from logging.config import fileConfig
 from itertools import chain
 from logging import getLogger
 from logging.config import fileConfig
-from os import listdir
 from pathlib import Path
 from sklearn import preprocessing
 
 from pathlib import Path
 from sklearn import preprocessing
 
@@ -27,17 +24,16 @@ class Preprocessing:
     '''
 
     def __init__(self, config_file=None,
     '''
 
     def __init__(self, config_file=None,
+                 start=None, end=None, timestep=None,
                  dict_features=None, dict_target=None):
         '''
         Constructor that defines all needed attributes and collects features.
         '''
         self._config = config_file
 
                  dict_features=None, dict_target=None):
         '''
         Constructor that defines all needed attributes and collects features.
         '''
         self._config = config_file
 
-        self._start = datetime.strptime(self._config['DATETIME']['start'],
-                                        '%m/%d/%Y %H:%M:%S')
-        self._end = datetime.strptime(self._config['DATETIME']['end'],
-                                      '%m/%d/%Y %H:%M:%S')
-        self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
+        self._start = start
+        self._end = end
+        self._timestep = timestep
         self._dict_features = dict_features
         self._dict_target = dict_target
 
         self._dict_features = dict_features
         self._dict_target = dict_target
 
@@ -196,6 +192,7 @@ class Preprocessing:
         '''
         logger.info("One hot encoding for categorical feature")
         # We store numerical columns
         '''
         logger.info("One hot encoding for categorical feature")
         # We store numerical columns
+
         df_out = pd.DataFrame()
         for col in self._numerical_columns:
             df_out[col] = self._dataframe[col]
         df_out = pd.DataFrame()
         for col in self._numerical_columns:
             df_out[col] = self._dataframe[col]
@@ -222,6 +219,8 @@ class Preprocessing:
             self._fill_nan()
             # Adding previous (historical) nb_interventions as features
             self._add_history()
             self._fill_nan()
             # Adding previous (historical) nb_interventions as features
             self._add_history()
+            # self._dataframe.to_csv('toto.csv')
+            # exit()
             # Normalizing numerical values
             self._standardize()
             # Dealing with categorical features
             # Normalizing numerical values
             self._standardize()
             # Dealing with categorical features
index e1c07ecfe604de8012715c25a773c2a66fed7bb7..e04a41a46470d7981db259b42c0ba833c8c53639 100644 (file)
@@ -1,33 +1,40 @@
+from astral import LocationInfo
+from astral.sun import sun
 from configparser import ConfigParser
 from datetime import datetime, timedelta
 from logging import getLogger
 from logging.config import fileConfig
 from pathlib import Path
 
 from configparser import ConfigParser
 from datetime import datetime, timedelta
 from logging import getLogger
 from logging.config import fileConfig
 from pathlib import Path
 
-import time
 import calendar
 import calendar
+import pytz
+import time
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
+
 class Ephemeris:
 
     _start = None
 class Ephemeris:
 
     _start = None
-    _end   = None
+    _end = None
 
 
-    def __init__(self, config_file):
+    def __init__(self, config_file, start, end):
 
         self._config = ConfigParser()
         self._config.read(config_file)
 
 
         self._config = ConfigParser()
         self._config.read(config_file)
 
+        self._city = LocationInfo("Besançon", "France", "Europe/Paris", 47.237829, -6.0240539)
+        self._start = start
+        self._end = end
         # Collecting ephemeris features
         self._features = [section for section in self._config
         # Collecting ephemeris features
         self._features = [section for section in self._config
-                              if self._config[section].getboolean('numerical')
-                              or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
 
         self._dated_features = {}
 
 
         self._dated_features = {}
 
-
     @property
     def start(self):
         return self._start
     @property
     def start(self):
         return self._start
@@ -36,7 +43,6 @@ class Ephemeris:
     def start(self, x):
         self._start = x
 
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
     @property
     def end(self):
         return self._end
@@ -45,15 +51,18 @@ class Ephemeris:
     def end(self, x):
         self._end = x
 
     def end(self, x):
         self._end = x
 
-
     @property
     def dated_features(self):
         if self._dated_features == {}:
             logger.info("Adding ephemeris features")
     @property
     def dated_features(self):
         if self._dated_features == {}:
             logger.info("Adding ephemeris features")
+            paris = pytz.timezone('Europe/Paris')
             date = self._start
             while date <= self._end:
             date = self._start
             while date <= self._end:
+                datel = paris.localize(date)
                 dict_hour = {}
                 Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
                 dict_hour = {}
                 Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
+                s = sun(self._city.observer, date=date,
+                        tzinfo=pytz.timezone('Europe/Paris'))
                 for feature in self._features:
                     if feature == 'hour':
                         dict_hour['hour'] = Date.tm_hour
                 for feature in self._features:
                     if feature == 'hour':
                         dict_hour['hour'] = Date.tm_hour
@@ -69,12 +78,23 @@ class Ephemeris:
                         # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
                         # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
                         # les 24 décembre... tombent toujours
                         # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
                         # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
                         # les 24 décembre... tombent toujours
-                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"):
-                            dict_hour['dayInYear'] = Date.tm_yday -1
+                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/" + str(Date.tm_year), "%d/%m/%Y"):
+                            dict_hour['dayInYear'] = Date.tm_yday - 1
                         else:
                             dict_hour['dayInYear'] = Date.tm_yday
                     elif feature == 'weekInYear':
                         dict_hour['weekInYear'] = date.isocalendar()[1]
                         else:
                             dict_hour['dayInYear'] = Date.tm_yday
                     elif feature == 'weekInYear':
                         dict_hour['weekInYear'] = date.isocalendar()[1]
+                    elif feature == 'sunRised':
+                        dict_hour['sunRised'] = (datel >= s["sunrise"] - timedelta(minutes=30)
+                                                 and datel <= s["sunset"] - timedelta(minutes=30))
+                    elif feature == 'noon':
+                        dict_hour['noon'] = (datel.hour == s["noon"].hour)
+                    elif feature == 'night':
+                        dict_hour['night'] = (datel <= s["dawn"] - timedelta(minutes=30)
+                                              or datel >= s["dusk"] - timedelta(minutes=30))
+                    elif feature == 'daylightSavingTime':
+                        dict_hour['daylightSavingTime'] = (datel.dst() == timedelta(0))
+
                 self._dated_features[date] = dict_hour
                 date += timedelta(hours=1)
                 self._dated_features[date] = dict_hour
                 date += timedelta(hours=1)
-        return self._dated_features
\ No newline at end of file
+        return self._dated_features
index 6893db03ef5ef7eb804d3f1d9ec391cf18a2dcaf..db61f0151d74fbfd04f68aece531ddc6d03bc44c 100644 (file)
@@ -11,10 +11,11 @@ import itertools
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
+
 class Holidays:
 
     _start = None
 class Holidays:
 
     _start = None
-    _end   = None
+    _end = None
 
     def __init__(self, config_file):
 
 
     def __init__(self, config_file):
 
@@ -23,12 +24,12 @@ class Holidays:
 
         # Collecting holidays features
         self._features = [section for section in self._config
 
         # Collecting holidays features
         self._features = [section for section in self._config
-                              if self._config[section].getboolean('numerical')
-                              or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
 
         self._dated_features = {}
 
 
         self._dated_features = {}
 
-
     @property
     def start(self):
         return self._start
     @property
     def start(self):
         return self._start
@@ -37,7 +38,6 @@ class Holidays:
     def start(self, x):
         self._start = x
 
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
     @property
     def end(self):
         return self._end
@@ -46,49 +46,46 @@ class Holidays:
     def end(self, x):
         self._end = x
 
     def end(self, x):
         self._end = x
 
-
-
     def _get_academic_zone(self, name, date):
         dict_zones = {
     def _get_academic_zone(self, name, date):
         dict_zones = {
-        'Caen' : ('A', 'B'),
-        'Clermont-Ferrand' : ('A', 'A'),
-        'Grenoble' : ('A', 'A'),
-        'Lyon' : ('A', 'A'),
-        'Montpellier' : ('A', 'C'),
-        'Nancy-Metz' : ('A', 'B'),
-        'Nantes' : ('A', 'B'),
-        'Rennes' : ('A', 'B'),
-        'Toulouse' : ('A', 'C'),
-        'Aix-Marseille' : ('B', 'B'),
-        'Amiens' : ('B', 'B'),
-        'Besançon' : ('B', 'A'),
-        'Dijon' : ('B', 'A'),
-        'Lille' : ('B', 'B'),
-        'Limoges' : ('B', 'A'),
-        'Nice' : ('B', 'B'),
-        'Orléans-Tours' : ('B', 'B'),
-        'Poitiers' : ('B', 'A'),
-        'Reims' : ('B', 'B'),
-        'Rouen ' : ('B', 'B'),
-        'Strasbourg' : ('B', 'B'),
-        'Bordeaux' : ('C', 'A'),
-        'Créteil' : ('C', 'C'),
-        'Paris' : ('C', 'C'),
-        'Versailles' : ('C', 'C')
+            'Caen': ('A', 'B'),
+            'Clermont-Ferrand': ('A', 'A'),
+            'Grenoble': ('A', 'A'),
+            'Lyon': ('A', 'A'),
+            'Montpellier': ('A', 'C'),
+            'Nancy-Metz': ('A', 'B'),
+            'Nantes': ('A', 'B'),
+            'Rennes': ('A', 'B'),
+            'Toulouse': ('A', 'C'),
+            'Aix-Marseille': ('B', 'B'),
+            'Amiens': ('B', 'B'),
+            'Besançon': ('B', 'A'),
+            'Dijon': ('B', 'A'),
+            'Lille': ('B', 'B'),
+            'Limoges': ('B', 'A'),
+            'Nice': ('B', 'B'),
+            'Orléans-Tours': ('B', 'B'),
+            'Poitiers': ('B', 'A'),
+            'Reims': ('B', 'B'),
+            'Rouen ': ('B', 'B'),
+            'Strasbourg': ('B', 'B'),
+            'Bordeaux': ('C', 'A'),
+            'Créteil': ('C', 'C'),
+            'Paris': ('C', 'C'),
+            'Versailles': ('C', 'C')
         }
         if date < datetime(2016, 1, 1):
             return dict_zones[name][0]
         else:
             return dict_zones[name][1]
 
         }
         if date < datetime(2016, 1, 1):
             return dict_zones[name][0]
         else:
             return dict_zones[name][1]
 
-
     @property
     def dated_features(self):
         if self._dated_features == {}:
             logger.info("Adding holidays features")
             bankHolidays = tuple(itertools.chain.from_iterable(list(JoursFeries.for_year(k).values())
     @property
     def dated_features(self):
         if self._dated_features == {}:
             logger.info("Adding holidays features")
             bankHolidays = tuple(itertools.chain.from_iterable(list(JoursFeries.for_year(k).values())
-                                           for k in range(self.start.year, self.end.year+1)))
-            bankHolidaysEve = tuple(u-timedelta(days=1) for u in bankHolidays)
+                                                               for k in range(self.start.year, self.end.year + 1)))
+            bankHolidaysEve = tuple(u - timedelta(days=1) for u in bankHolidays)
             name = self._config['ZONE']['name']
             date = self._start
             Date = datetime.date(date)
             name = self._config['ZONE']['name']
             date = self._start
             Date = datetime.date(date)
@@ -96,7 +93,7 @@ class Holidays:
             Tomorrow = datetime.date(tomorrow)
             d = SchoolHolidayDates()
             dict_hour = {
             Tomorrow = datetime.date(tomorrow)
             d = SchoolHolidayDates()
             dict_hour = {
-                'bankHolidays' : Date in bankHolidays,
+                'bankHolidays': Date in bankHolidays,
                 'bankHolidaysEve': Date in bankHolidaysEve,
                 'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                 'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
                 'bankHolidaysEve': Date in bankHolidaysEve,
                 'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                 'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
@@ -110,9 +107,9 @@ class Holidays:
                     tomorrow = date + timedelta(days=1)
                     Tomorrow = datetime.date(tomorrow)
                     dict_hour = {
                     tomorrow = date + timedelta(days=1)
                     Tomorrow = datetime.date(tomorrow)
                     dict_hour = {
-                        'bankHolidays' : Date in bankHolidays,
+                        'bankHolidays': Date in bankHolidays,
                         'bankHolidaysEve': Date in bankHolidaysEve,
                         'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                         'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
                     }
                         'bankHolidaysEve': Date in bankHolidaysEve,
                         'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                         'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
                     }
-        return self._dated_features
\ No newline at end of file
+        return self._dated_features
index 0edd49f544f33ed07b17c93dc7b1e493a6d0e6e5..05eb7104563ba6ce7a2b2cab653ee3463b88642e 100644 (file)
@@ -19,12 +19,12 @@ logger = getLogger()
 
 class MeteoFrance:
 
 
 class MeteoFrance:
 
-    _latitude    = None
-    _longitude   = None
+    _latitude = None
+    _longitude = None
     _nb_stations = None
     _nb_stations = None
-    _start       = None
-    _end         = None
-    _features    = None
+    _start = None
+    _end = None
+    _features = None
 
     def __init__(self, config_file):
         '''
 
     def __init__(self, config_file):
         '''
@@ -51,10 +51,9 @@ class MeteoFrance:
         # Collecting meteofrance features
         self._features = [section for section in self._config
                           if self._config.has_option(section, 'numerical')
         # Collecting meteofrance features
         self._features = [section for section in self._config
                           if self._config.has_option(section, 'numerical')
-                          and (self._config[section]['numerical'] or
-                               self._config[section]['categorical'])]
-
-
+                          and (self._config[section]['binary'] or
+                               self._config[section]['categorical'] or
+                               self._config[section]['numerical'])]
 
     @property
     def start(self):
 
     @property
     def start(self):
@@ -64,7 +63,6 @@ class MeteoFrance:
     def start(self, x):
         self._start = x
 
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
     @property
     def end(self):
         return self._end
@@ -73,7 +71,6 @@ class MeteoFrance:
     def end(self, x):
         self._end = x
 
     def end(self, x):
         self._end = x
 
-
     @property
     def latitude(self):
         return self._latitude
     @property
     def latitude(self):
         return self._latitude
@@ -82,7 +79,6 @@ class MeteoFrance:
     def latitude(self, x):
         self._latitude = x
 
     def latitude(self, x):
         self._latitude = x
 
-
     @property
     def longitude(self):
         return self._longitude
     @property
     def longitude(self):
         return self._longitude
@@ -91,7 +87,6 @@ class MeteoFrance:
     def longitude(self, x):
         self._longitude = x
 
     def longitude(self, x):
         self._longitude = x
 
-
     @property
     def nb_stations(self):
         return self._nb_stations
     @property
     def nb_stations(self):
         return self._nb_stations
@@ -100,7 +95,6 @@ class MeteoFrance:
     def nb_stations(self, x):
         self._nb_stations = x
 
     def nb_stations(self, x):
         self._nb_stations = x
 
-
     def _regenerate_directory(self):
         '''
         Re-creating data directory architecture for MeteoFrance
     def _regenerate_directory(self):
         '''
         Re-creating data directory architecture for MeteoFrance
@@ -115,8 +109,6 @@ class MeteoFrance:
         p = Path(self._data_directory / 'config')
         p.mkdir(exist_ok=True, parents=True)
 
         p = Path(self._data_directory / 'config')
         p.mkdir(exist_ok=True, parents=True)
 
-
-
     def _get_stations(self):
         '''
         Collect (after downloading them, if needed) the stations and their
     def _get_stations(self):
         '''
         Collect (after downloading them, if needed) the stations and their
@@ -129,7 +121,7 @@ class MeteoFrance:
         # The csv file of meteo stations (names, ids and locations) if downloaded,
         # if not available in the config directory within data / meteo_france
         link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
         # The csv file of meteo stations (names, ids and locations) if downloaded,
         # if not available in the config directory within data / meteo_france
         link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
-        p = Path(self._data_directory / 'config' )
+        p = Path(self._data_directory / 'config')
         csv_file = p / basename(link)
         if not isfile(csv_file):
             logger.info('Downloading location stations from MeteoFrance')
         csv_file = p / basename(link)
         if not isfile(csv_file):
             logger.info('Downloading location stations from MeteoFrance')
@@ -142,11 +134,11 @@ class MeteoFrance:
             reader = DictReader(f, delimiter=';')
             for row in reader:
                 latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
             reader = DictReader(f, delimiter=';')
             for row in reader:
                 latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
-                self._dict_stations[row['Nom'].replace("'",'’')] = {
-                    'id' : row['ID'],
-                    'longitude' : longitude,
-                    'latitude' : latitude,
-                    'distance' : vincenty(
+                self._dict_stations[row['Nom'].replace("'", '’')] = {
+                    'id': row['ID'],
+                    'longitude': longitude,
+                    'latitude': latitude,
+                    'distance': vincenty(
                         (self._latitude, self._longitude),
                         (latitude, longitude)).km
                 }
                         (self._latitude, self._longitude),
                         (latitude, longitude)).km
                 }
@@ -154,13 +146,11 @@ class MeteoFrance:
         # Find the closest stations
         logger.info('Finding the closest stations')
         stations_by_distance = sorted(self._dict_stations.keys(),
         # Find the closest stations
         logger.info('Finding the closest stations')
         stations_by_distance = sorted(self._dict_stations.keys(),
-                                      key = lambda x: self._dict_stations[x]['distance'])
+                                      key=lambda x: self._dict_stations[x]['distance'])
         logger.info(f'The {self._nb_stations} closest stations are: '
                     f'{", ".join(stations_by_distance[:self._nb_stations])}.')
         return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
 
         logger.info(f'The {self._nb_stations} closest stations are: '
                     f'{", ".join(stations_by_distance[:self._nb_stations])}.')
         return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
 
-
-
     def _collect_historical_data(self):
         '''
         We collect all csv files from January 1996 until the month
     def _collect_historical_data(self):
         '''
         We collect all csv files from January 1996 until the month
@@ -171,8 +161,8 @@ class MeteoFrance:
         # List of year-months to consider
         historical = []
         date_end = self._end
         # List of year-months to consider
         historical = []
         date_end = self._end
-        for year in range(self._start.year, date_end.year+1):
-            for month in range(1,13):
+        for year in range(self._start.year, date_end.year + 1):
+            for month in range(1, 13):
                 date = datetime(year, month, 1)
                 if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
                 date = datetime(year, month, 1)
                 if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
@@ -183,7 +173,7 @@ class MeteoFrance:
         p = Path(meteo_data)
         p.mkdir(exist_ok=True, parents=True)
         for date in historical:
         p = Path(meteo_data)
         p.mkdir(exist_ok=True, parents=True)
         for date in historical:
-            if not isfile(meteo_data / ('synop.'+date+'.csv')):
+            if not isfile(meteo_data / ('synop.' + date + '.csv')):
                 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
                 link += date + '.csv.gz'
                 download_path = meteo_data / basename(link)
                 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
                 link += date + '.csv.gz'
                 download_path = meteo_data / basename(link)
@@ -194,8 +184,6 @@ class MeteoFrance:
                         g.write(f.read().decode())
                         remove(meteo_data / basename(link))
 
                         g.write(f.read().decode())
                         remove(meteo_data / basename(link))
 
-
-
     def update(self):
         '''
         Update the MeteoFrance features with the last available data
     def update(self):
         '''
         Update the MeteoFrance features with the last available data
@@ -209,15 +197,13 @@ class MeteoFrance:
 
         logger.info('Update historical csv files from MeteoFrance, if needed')
         today = datetime.now()
 
         logger.info('Update historical csv files from MeteoFrance, if needed')
         today = datetime.now()
-        todel = 'synop.'+today.strftime("%Y%m")+".csv"
+        todel = 'synop.' + today.strftime("%Y%m") + ".csv"
         try:
             remove(self._data_directory / 'historical' / todel)
         except:
             logger.warning(f"{self._data_directory / 'historical' / todel} not found")
         self._collect_historical_data()
 
         try:
             remove(self._data_directory / 'historical' / todel)
         except:
             logger.warning(f"{self._data_directory / 'historical' / todel} not found")
         self._collect_historical_data()
 
-
-
     @property
     def dated_features(self):
         '''
     @property
     def dated_features(self):
         '''
@@ -234,26 +220,27 @@ class MeteoFrance:
             logger.info('Collecting meteofrance feature information')
             # A dictionary for the features
             dico_features = {self._config[section]["abbreviation"]:
             logger.info('Collecting meteofrance feature information')
             # A dictionary for the features
             dico_features = {self._config[section]["abbreviation"]:
-                               {
-                                   'name': section, # feature name
-                                   'numerical': self._config[section]['numerical'],
-                                   'categorical': self._config[section]['categorical']
-                                }
-                            for section in self._features}
+                             {
+                'name': section,  # feature name
+                'numerical': self._config[section]['numerical'],
+                'categorical': self._config[section]['categorical']
+            }
+                for section in self._features}
             dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
             self._dated_features = {}
             dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
             self._dated_features = {}
+
             for csv_meteo in sorted(listdir(dir_data)):
                 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                 if (date >= self._start and date <= self._end)\
             for csv_meteo in sorted(listdir(dir_data)):
                 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                 if (date >= self._start and date <= self._end)\
-                or (date.year == self._start.year and date.month == self._start.month)\
-                or (date.year == self._end.year and date.month == self._end.month):
+                        or (date.year == self._start.year and date.month == self._start.month)\
+                        or (date.year == self._end.year and date.month == self._end.month):
                     logger.info(f'Adding meteofrance features from {csv_meteo}')
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
                             if row['numer_sta'] in self._stations:
                                 date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
                     logger.info(f'Adding meteofrance features from {csv_meteo}')
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
                             if row['numer_sta'] in self._stations:
                                 date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
-                                if date  >= self._start and date <= self._end:
-                                    self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                                if date >= self._start and date <= self._end:
+                                    self._dated_features.setdefault(date, {}).update({dico_features[feat]['name'] + '_' + str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq', 'None')) for feat in dico_features})
         return self._dated_features
 
         return self._dated_features
 
index 6836df15a474016645d6665c163767b78577975c..e0875d2ce47ce2875ad7e0532aafbcf36fa94b9b 100644 (file)
@@ -22,8 +22,9 @@ class Ramadan:
 
         # Collecting holidays features
         self._features = [section for section in self._config
 
         # Collecting holidays features
         self._features = [section for section in self._config
-                          if self._config[section].getboolean('numerical')
-                          or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
 
         self._dated_features = {}
 
 
         self._dated_features = {}
 
diff --git a/predictops/source/sentinelles.py b/predictops/source/sentinelles.py
new file mode 100644 (file)
index 0000000..38e7171
--- /dev/null
@@ -0,0 +1,17 @@
+from configparser import ConfigParser
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Sentinelles:
+    def __init__(self, config_file):
+        '''
+        Constructor of the MeteoFrance source of feature.
+        '''
+        self._config = ConfigParser()
+        self._config.read(config_file)
index 9e3d86d31de82f24ad8a8c2bd6abf6e742f446fe..fdff54ba2259aef28f13964f71b8e115f826622b 100644 (file)
@@ -11,8 +11,8 @@ logger = getLogger()
 
 class Target:
 
 
 class Target:
 
-    def __init__(self, config_file = None,
-                 start = None, end = None, timestep = None, cumulative = None):
+    def __init__(self, config_file=None,
+                 start=None, end=None, timestep=None, cumulative=None):
 
         self._config = ConfigParser()
         self._config.read(config_file)
 
         self._config = ConfigParser()
         self._config.read(config_file)
@@ -33,8 +33,6 @@ class Target:
         self._stream_file = eval(self._config['DATA']['csv_file'])
         self._get_located_interventions()
 
         self._stream_file = eval(self._config['DATA']['csv_file'])
         self._get_located_interventions()
 
-
-
     @property
     def start(self):
         return self._start
     @property
     def start(self):
         return self._start
@@ -43,7 +41,6 @@ class Target:
     def start(self, x):
         self._start = x
 
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
     @property
     def end(self):
         return self._end
@@ -52,7 +49,6 @@ class Target:
     def end(self, x):
         self._end = x
 
     def end(self, x):
         self._end = x
 
-
     @property
     def y(self):
         return self._y
     @property
     def y(self):
         return self._y
@@ -61,8 +57,6 @@ class Target:
     def end(self, y):
         self._y = y
 
     def end(self, y):
         self._y = y
 
-
-
     def _get_located_interventions(self):
         if not self._config['SPECIFICATION'].getboolean('origin')\
            and not self._config['SPECIFICATION'].getboolean('destination'):
     def _get_located_interventions(self):
         if not self._config['SPECIFICATION'].getboolean('origin')\
            and not self._config['SPECIFICATION'].getboolean('destination'):
index 6615f1fc4c5a72ac226089c20e20c6f9babcaef1..bd50026191e5c3fd31a93f009fab3e6eae9c98e8 100644 (file)
@@ -1,3 +1,4 @@
+astral==2.1
 attrs==19.3.0
 Click==7.0
 click-plugins==1.1.1
 attrs==19.3.0
 Click==7.0
 click-plugins==1.1.1