]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Refactoring, fin du lever/coucher de soleil, et début de sentinelles master
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
16 files changed:
config/features/feature_ephemeris.cfg
config/features/feature_meteo.cfg
config/learn.cfg
config/learners/lightgbm.cfg
config/learners/xgboost.cfg
config/targets/sdis25.cfg
predictops/engine.py
predictops/learn/learning.py
predictops/learn/preprocessing.py
predictops/source/ephemeris.py
predictops/source/holidays.py
predictops/source/meteofrance.py
predictops/source/ramadan.py
predictops/source/sentinelles.py [new file with mode: 0644]
predictops/target/target.py
requirements.txt

index 3ed31c76c6b3e5fa2124886b3adc5e0233d2841b..47c8f12e02b311075b85c8c159c13806cba94cdd 100644 (file)
@@ -31,4 +31,24 @@ numerical   = True
 [year]
 binary      = False
 categorical = False
-numerical   = True
\ No newline at end of file
+numerical   = True
+
+[sunRised]
+binary      = True
+categorical = False
+numerical   = False
+
+[noon]
+binary      = True
+categorical = False
+numerical   = False
+
+[night]
+binary      = True
+categorical = False
+numerical   = False
+
+[daylightSavingTime]
+binary      = True
+categorical = False
+numerical   = False
index 5b694e5f021ad85d7da6c012c885fef1af514a13..a0a0eabead9422130c2440ba96560f10c23b32c6 100644 (file)
@@ -2,10 +2,6 @@
 regenerate = False
 reinsert   = True
 
-[POSITION]
-latitude  = 47.25
-longitude = 6.0333
-
 [STATIONS]
 nb_stations = 3
 
index aef0edd22f22e03a84ea12dacf04f4bcf3db6322..9081652d64e234726d3362fd36179efd124cdb22 100644 (file)
@@ -1,7 +1,17 @@
 [DATETIME]
-start    = 01/01/2006 00:00:00
+start    = 01/01/2016 00:00:00
 end      = 12/31/2019 23:00:00
-hourStep = 1
+hourStep = 3
+
+
+[HISTORY_KNOWLEDGE]
+nb_lines = 24//3*7*4
+
+
+[TARGET]
+config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+cumulative  = True
+horizon     = 3
 
 
 [FEATURES]
@@ -9,6 +19,7 @@ ephemeris   = True
 holidays    = True
 meteofrance = True
 ramadan     = True
+sentinelles = True
 
 
 [FEATURE_CONFIG]
@@ -16,6 +27,7 @@ ephemeris   = (Path.cwd() / 'config') / 'features' / 'feature_ephemeris.cfg'
 holidays    = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
 meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
 ramadan     = (Path.cwd() / 'config') / 'features' / 'feature_ramadan.cfg'
+sentinelles = (Path.cwd() / 'config') / 'features' / 'feature_sentinelles.cfg'
 
 
 [PREPROCESSING]
@@ -23,14 +35,5 @@ fill_method = linear
 order       = 3
 
 
-[HISTORY_KNOWLEDGE]
-nb_lines = 24*7*4
-
-
-[TARGET]
-config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
-cumulative  = True
-horizon     = 0
-
 [LEARNER]
 config =  (Path.cwd() / 'config') / 'learners' / 'lightgbm.cfg'
\ No newline at end of file
index b5887f0497b8db84204398c21990d6df43c04559..32d19728c7fc29866135c3f9c205facc9b11b7ea 100644 (file)
@@ -4,7 +4,7 @@ method = lightgbm
 [HYPERPARAMETERS]
 learning_rate  = 0.1
 metric         = auc
-num_iterations = 1000
+num_iterations = 300
 num_round      = 10
-num_leaves     = 31
+num_leaves     = 900
 objective      = poisson
index f9e53298ef6586768c7c5d7d3a55e697f604670b..9a1836330d00b827edfeeede7a082e809efdd191 100644 (file)
@@ -4,7 +4,7 @@ method = xgboost
 
 [HYPERPARAMETERS]
 learning_rate = 0.01
-max_depth     = 7
+max_depth     = 6
 random_state  = 42
 n_estimators  = 10000
 n_jobs        = -1
index 712e93b1d6f3ea78815c3c7d87e747b9a0c0d8fd..2d4f9f0d48a1ab0cd329dfadaa925f4121a618db 100644 (file)
@@ -1,3 +1,10 @@
+[POSITION]
+name      = Besançon
+country   = France
+timezone  = Europe/Paris
+latitude  = 47.237829
+longitude = -6.0240539
+
 [SPECIFICATION]
 origin      = False
 destination = False
index a63ef2932aee9086fb23ede8902ec4ea0820adb8..bb85787ca304aa2643a5c1e2c42e53362ced5689 100644 (file)
@@ -6,13 +6,15 @@ from pathlib import Path
 from shutil import rmtree
 
 import os
+import pytz
 
 from .learn.learning import Learning
 from .learn.preprocessing import Preprocessing
 from .source.ephemeris import Ephemeris
 from .source.holidays import Holidays
-from .source.ramadan import Ramadan
 from .source.meteofrance import MeteoFrance
+from .source.ramadan import Ramadan
+from .source.sentinelles import Sentinelles
 from .target.target import Target
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
@@ -68,10 +70,10 @@ class Engine(object):
                 self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
                 self._config_text += f.read() + '\n\n'
 
-            ephemerides = Ephemeris(config_file=config_file)
+            ephemerides = Ephemeris(config_file=config_file, start=self._start, end=self._end)
 
-            ephemerides.start = self._start
-            ephemerides.end = self._end
+            ephemerides.start = self._start
+            ephemerides.end = self._end
 
             dated_features = ephemerides.dated_features
             for date in dated_features:
@@ -136,6 +138,8 @@ class Engine(object):
 
     def add_preprocessing(self):
         self._preproc = Preprocessing(config_file=self._config,
+                                      start=self._start, end=self._end,
+                                      timestep=self._timestep,
                                       dict_features=self.X,
                                       dict_target=self.y)
 
index fb3675d56d7e007c557d93aeedd64bcc514a8df0..f13c3a6514eb2a8b552a4aa80fd13b275f590036 100644 (file)
@@ -32,10 +32,6 @@ class Learning:
         else:
             self._X = X
             self._y = y
-        rep = (Path.cwd() / self._file_name)
-        rep.mkdir()
-        self._filename = str(self._file_name / os.path.basename(self._file_name))
-        self._X.to_csv(self._filename + '.csv')
         self._learn()
         self._evaluate()
 
@@ -92,6 +88,10 @@ class Learning:
         for k in range(10):
             txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
 
+        rep = (Path.cwd() / self._file_name)
+        rep.mkdir()
+        self._filename = str(self._file_name / os.path.basename(self._file_name))
+
         print(txt)
         with open(self._filename + ".result", 'w') as f:
             f.write(txt)
index 9bc09ad2eca2759c22b6047c3ded8ab747e015de..55cffbd2a0e094a610580e1e4dbcdf8adc80de5d 100644 (file)
@@ -1,10 +1,7 @@
 from configparser import ConfigParser
-from csv import DictReader
-from datetime import datetime, timedelta
 from itertools import chain
 from logging import getLogger
 from logging.config import fileConfig
-from os import listdir
 from pathlib import Path
 from sklearn import preprocessing
 
@@ -27,17 +24,16 @@ class Preprocessing:
     '''
 
     def __init__(self, config_file=None,
+                 start=None, end=None, timestep=None,
                  dict_features=None, dict_target=None):
         '''
         Constructor that defines all needed attributes and collects features.
         '''
         self._config = config_file
 
-        self._start = datetime.strptime(self._config['DATETIME']['start'],
-                                        '%m/%d/%Y %H:%M:%S')
-        self._end = datetime.strptime(self._config['DATETIME']['end'],
-                                      '%m/%d/%Y %H:%M:%S')
-        self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
+        self._start = start
+        self._end = end
+        self._timestep = timestep
         self._dict_features = dict_features
         self._dict_target = dict_target
 
@@ -196,6 +192,7 @@ class Preprocessing:
         '''
         logger.info("One hot encoding for categorical feature")
         # We store numerical columns
+
         df_out = pd.DataFrame()
         for col in self._numerical_columns:
             df_out[col] = self._dataframe[col]
@@ -222,6 +219,8 @@ class Preprocessing:
             self._fill_nan()
             # Adding previous (historical) nb_interventions as features
             self._add_history()
+            # self._dataframe.to_csv('toto.csv')
+            # exit()
             # Normalizing numerical values
             self._standardize()
             # Dealing with categorical features
index e1c07ecfe604de8012715c25a773c2a66fed7bb7..e04a41a46470d7981db259b42c0ba833c8c53639 100644 (file)
@@ -1,33 +1,40 @@
+from astral import LocationInfo
+from astral.sun import sun
 from configparser import ConfigParser
 from datetime import datetime, timedelta
 from logging import getLogger
 from logging.config import fileConfig
 from pathlib import Path
 
-import time
 import calendar
+import pytz
+import time
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
+
 class Ephemeris:
 
     _start = None
-    _end   = None
+    _end = None
 
-    def __init__(self, config_file):
+    def __init__(self, config_file, start, end):
 
         self._config = ConfigParser()
         self._config.read(config_file)
 
+        self._city = LocationInfo("Besançon", "France", "Europe/Paris", 47.237829, -6.0240539)
+        self._start = start
+        self._end = end
         # Collecting ephemeris features
         self._features = [section for section in self._config
-                              if self._config[section].getboolean('numerical')
-                              or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
 
         self._dated_features = {}
 
-
     @property
     def start(self):
         return self._start
@@ -36,7 +43,6 @@ class Ephemeris:
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
@@ -45,15 +51,18 @@ class Ephemeris:
     def end(self, x):
         self._end = x
 
-
     @property
     def dated_features(self):
         if self._dated_features == {}:
             logger.info("Adding ephemeris features")
+            paris = pytz.timezone('Europe/Paris')
             date = self._start
             while date <= self._end:
+                datel = paris.localize(date)
                 dict_hour = {}
                 Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
+                s = sun(self._city.observer, date=date,
+                        tzinfo=pytz.timezone('Europe/Paris'))
                 for feature in self._features:
                     if feature == 'hour':
                         dict_hour['hour'] = Date.tm_hour
@@ -69,12 +78,23 @@ class Ephemeris:
                         # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
                         # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
                         # les 24 décembre... tombent toujours
-                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"):
-                            dict_hour['dayInYear'] = Date.tm_yday -1
+                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/" + str(Date.tm_year), "%d/%m/%Y"):
+                            dict_hour['dayInYear'] = Date.tm_yday - 1
                         else:
                             dict_hour['dayInYear'] = Date.tm_yday
                     elif feature == 'weekInYear':
                         dict_hour['weekInYear'] = date.isocalendar()[1]
+                    elif feature == 'sunRised':
+                        dict_hour['sunRised'] = (datel >= s["sunrise"] - timedelta(minutes=30)
+                                                 and datel <= s["sunset"] - timedelta(minutes=30))
+                    elif feature == 'noon':
+                        dict_hour['noon'] = (datel.hour == s["noon"].hour)
+                    elif feature == 'night':
+                        dict_hour['night'] = (datel <= s["dawn"] - timedelta(minutes=30)
+                                              or datel >= s["dusk"] - timedelta(minutes=30))
+                    elif feature == 'daylightSavingTime':
+                        dict_hour['daylightSavingTime'] = (datel.dst() == timedelta(0))
+
                 self._dated_features[date] = dict_hour
                 date += timedelta(hours=1)
-        return self._dated_features
\ No newline at end of file
+        return self._dated_features
index 6893db03ef5ef7eb804d3f1d9ec391cf18a2dcaf..db61f0151d74fbfd04f68aece531ddc6d03bc44c 100644 (file)
@@ -11,10 +11,11 @@ import itertools
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
+
 class Holidays:
 
     _start = None
-    _end   = None
+    _end = None
 
     def __init__(self, config_file):
 
@@ -23,12 +24,12 @@ class Holidays:
 
         # Collecting holidays features
         self._features = [section for section in self._config
-                              if self._config[section].getboolean('numerical')
-                              or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
 
         self._dated_features = {}
 
-
     @property
     def start(self):
         return self._start
@@ -37,7 +38,6 @@ class Holidays:
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
@@ -46,49 +46,46 @@ class Holidays:
     def end(self, x):
         self._end = x
 
-
-
     def _get_academic_zone(self, name, date):
         dict_zones = {
-        'Caen' : ('A', 'B'),
-        'Clermont-Ferrand' : ('A', 'A'),
-        'Grenoble' : ('A', 'A'),
-        'Lyon' : ('A', 'A'),
-        'Montpellier' : ('A', 'C'),
-        'Nancy-Metz' : ('A', 'B'),
-        'Nantes' : ('A', 'B'),
-        'Rennes' : ('A', 'B'),
-        'Toulouse' : ('A', 'C'),
-        'Aix-Marseille' : ('B', 'B'),
-        'Amiens' : ('B', 'B'),
-        'Besançon' : ('B', 'A'),
-        'Dijon' : ('B', 'A'),
-        'Lille' : ('B', 'B'),
-        'Limoges' : ('B', 'A'),
-        'Nice' : ('B', 'B'),
-        'Orléans-Tours' : ('B', 'B'),
-        'Poitiers' : ('B', 'A'),
-        'Reims' : ('B', 'B'),
-        'Rouen ' : ('B', 'B'),
-        'Strasbourg' : ('B', 'B'),
-        'Bordeaux' : ('C', 'A'),
-        'Créteil' : ('C', 'C'),
-        'Paris' : ('C', 'C'),
-        'Versailles' : ('C', 'C')
+            'Caen': ('A', 'B'),
+            'Clermont-Ferrand': ('A', 'A'),
+            'Grenoble': ('A', 'A'),
+            'Lyon': ('A', 'A'),
+            'Montpellier': ('A', 'C'),
+            'Nancy-Metz': ('A', 'B'),
+            'Nantes': ('A', 'B'),
+            'Rennes': ('A', 'B'),
+            'Toulouse': ('A', 'C'),
+            'Aix-Marseille': ('B', 'B'),
+            'Amiens': ('B', 'B'),
+            'Besançon': ('B', 'A'),
+            'Dijon': ('B', 'A'),
+            'Lille': ('B', 'B'),
+            'Limoges': ('B', 'A'),
+            'Nice': ('B', 'B'),
+            'Orléans-Tours': ('B', 'B'),
+            'Poitiers': ('B', 'A'),
+            'Reims': ('B', 'B'),
+            'Rouen ': ('B', 'B'),
+            'Strasbourg': ('B', 'B'),
+            'Bordeaux': ('C', 'A'),
+            'Créteil': ('C', 'C'),
+            'Paris': ('C', 'C'),
+            'Versailles': ('C', 'C')
         }
         if date < datetime(2016, 1, 1):
             return dict_zones[name][0]
         else:
             return dict_zones[name][1]
 
-
     @property
     def dated_features(self):
         if self._dated_features == {}:
             logger.info("Adding holidays features")
             bankHolidays = tuple(itertools.chain.from_iterable(list(JoursFeries.for_year(k).values())
-                                           for k in range(self.start.year, self.end.year+1)))
-            bankHolidaysEve = tuple(u-timedelta(days=1) for u in bankHolidays)
+                                                               for k in range(self.start.year, self.end.year + 1)))
+            bankHolidaysEve = tuple(u - timedelta(days=1) for u in bankHolidays)
             name = self._config['ZONE']['name']
             date = self._start
             Date = datetime.date(date)
@@ -96,7 +93,7 @@ class Holidays:
             Tomorrow = datetime.date(tomorrow)
             d = SchoolHolidayDates()
             dict_hour = {
-                'bankHolidays' : Date in bankHolidays,
+                'bankHolidays': Date in bankHolidays,
                 'bankHolidaysEve': Date in bankHolidaysEve,
                 'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                 'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
@@ -110,9 +107,9 @@ class Holidays:
                     tomorrow = date + timedelta(days=1)
                     Tomorrow = datetime.date(tomorrow)
                     dict_hour = {
-                        'bankHolidays' : Date in bankHolidays,
+                        'bankHolidays': Date in bankHolidays,
                         'bankHolidaysEve': Date in bankHolidaysEve,
                         'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                         'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
                     }
-        return self._dated_features
\ No newline at end of file
+        return self._dated_features
index 0edd49f544f33ed07b17c93dc7b1e493a6d0e6e5..05eb7104563ba6ce7a2b2cab653ee3463b88642e 100644 (file)
@@ -19,12 +19,12 @@ logger = getLogger()
 
 class MeteoFrance:
 
-    _latitude    = None
-    _longitude   = None
+    _latitude = None
+    _longitude = None
     _nb_stations = None
-    _start       = None
-    _end         = None
-    _features    = None
+    _start = None
+    _end = None
+    _features = None
 
     def __init__(self, config_file):
         '''
@@ -51,10 +51,9 @@ class MeteoFrance:
         # Collecting meteofrance features
         self._features = [section for section in self._config
                           if self._config.has_option(section, 'numerical')
-                          and (self._config[section]['numerical'] or
-                               self._config[section]['categorical'])]
-
-
+                          and (self._config[section]['binary'] or
+                               self._config[section]['categorical'] or
+                               self._config[section]['numerical'])]
 
     @property
     def start(self):
@@ -64,7 +63,6 @@ class MeteoFrance:
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
@@ -73,7 +71,6 @@ class MeteoFrance:
     def end(self, x):
         self._end = x
 
-
     @property
     def latitude(self):
         return self._latitude
@@ -82,7 +79,6 @@ class MeteoFrance:
     def latitude(self, x):
         self._latitude = x
 
-
     @property
     def longitude(self):
         return self._longitude
@@ -91,7 +87,6 @@ class MeteoFrance:
     def longitude(self, x):
         self._longitude = x
 
-
     @property
     def nb_stations(self):
         return self._nb_stations
@@ -100,7 +95,6 @@ class MeteoFrance:
     def nb_stations(self, x):
         self._nb_stations = x
 
-
     def _regenerate_directory(self):
         '''
         Re-creating data directory architecture for MeteoFrance
@@ -115,8 +109,6 @@ class MeteoFrance:
         p = Path(self._data_directory / 'config')
         p.mkdir(exist_ok=True, parents=True)
 
-
-
     def _get_stations(self):
         '''
         Collect (after downloading them, if needed) the stations and their
@@ -129,7 +121,7 @@ class MeteoFrance:
         # The csv file of meteo stations (names, ids and locations) if downloaded,
         # if not available in the config directory within data / meteo_france
         link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
-        p = Path(self._data_directory / 'config' )
+        p = Path(self._data_directory / 'config')
         csv_file = p / basename(link)
         if not isfile(csv_file):
             logger.info('Downloading location stations from MeteoFrance')
@@ -142,11 +134,11 @@ class MeteoFrance:
             reader = DictReader(f, delimiter=';')
             for row in reader:
                 latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
-                self._dict_stations[row['Nom'].replace("'",'’')] = {
-                    'id' : row['ID'],
-                    'longitude' : longitude,
-                    'latitude' : latitude,
-                    'distance' : vincenty(
+                self._dict_stations[row['Nom'].replace("'", '’')] = {
+                    'id': row['ID'],
+                    'longitude': longitude,
+                    'latitude': latitude,
+                    'distance': vincenty(
                         (self._latitude, self._longitude),
                         (latitude, longitude)).km
                 }
@@ -154,13 +146,11 @@ class MeteoFrance:
         # Find the closest stations
         logger.info('Finding the closest stations')
         stations_by_distance = sorted(self._dict_stations.keys(),
-                                      key = lambda x: self._dict_stations[x]['distance'])
+                                      key=lambda x: self._dict_stations[x]['distance'])
         logger.info(f'The {self._nb_stations} closest stations are: '
                     f'{", ".join(stations_by_distance[:self._nb_stations])}.')
         return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
 
-
-
     def _collect_historical_data(self):
         '''
         We collect all csv files from January 1996 until the month
@@ -171,8 +161,8 @@ class MeteoFrance:
         # List of year-months to consider
         historical = []
         date_end = self._end
-        for year in range(self._start.year, date_end.year+1):
-            for month in range(1,13):
+        for year in range(self._start.year, date_end.year + 1):
+            for month in range(1, 13):
                 date = datetime(year, month, 1)
                 if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
@@ -183,7 +173,7 @@ class MeteoFrance:
         p = Path(meteo_data)
         p.mkdir(exist_ok=True, parents=True)
         for date in historical:
-            if not isfile(meteo_data / ('synop.'+date+'.csv')):
+            if not isfile(meteo_data / ('synop.' + date + '.csv')):
                 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
                 link += date + '.csv.gz'
                 download_path = meteo_data / basename(link)
@@ -194,8 +184,6 @@ class MeteoFrance:
                         g.write(f.read().decode())
                         remove(meteo_data / basename(link))
 
-
-
     def update(self):
         '''
         Update the MeteoFrance features with the last available data
@@ -209,15 +197,13 @@ class MeteoFrance:
 
         logger.info('Update historical csv files from MeteoFrance, if needed')
         today = datetime.now()
-        todel = 'synop.'+today.strftime("%Y%m")+".csv"
+        todel = 'synop.' + today.strftime("%Y%m") + ".csv"
         try:
             remove(self._data_directory / 'historical' / todel)
         except:
             logger.warning(f"{self._data_directory / 'historical' / todel} not found")
         self._collect_historical_data()
 
-
-
     @property
     def dated_features(self):
         '''
@@ -234,26 +220,27 @@ class MeteoFrance:
             logger.info('Collecting meteofrance feature information')
             # A dictionary for the features
             dico_features = {self._config[section]["abbreviation"]:
-                               {
-                                   'name': section, # feature name
-                                   'numerical': self._config[section]['numerical'],
-                                   'categorical': self._config[section]['categorical']
-                                }
-                            for section in self._features}
+                             {
+                'name': section,  # feature name
+                'numerical': self._config[section]['numerical'],
+                'categorical': self._config[section]['categorical']
+            }
+                for section in self._features}
             dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
             self._dated_features = {}
+
             for csv_meteo in sorted(listdir(dir_data)):
                 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                 if (date >= self._start and date <= self._end)\
-                or (date.year == self._start.year and date.month == self._start.month)\
-                or (date.year == self._end.year and date.month == self._end.month):
+                        or (date.year == self._start.year and date.month == self._start.month)\
+                        or (date.year == self._end.year and date.month == self._end.month):
                     logger.info(f'Adding meteofrance features from {csv_meteo}')
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
                             if row['numer_sta'] in self._stations:
                                 date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
-                                if date  >= self._start and date <= self._end:
-                                    self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                                if date >= self._start and date <= self._end:
+                                    self._dated_features.setdefault(date, {}).update({dico_features[feat]['name'] + '_' + str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq', 'None')) for feat in dico_features})
         return self._dated_features
 
index 6836df15a474016645d6665c163767b78577975c..e0875d2ce47ce2875ad7e0532aafbcf36fa94b9b 100644 (file)
@@ -22,8 +22,9 @@ class Ramadan:
 
         # Collecting holidays features
         self._features = [section for section in self._config
-                          if self._config[section].getboolean('numerical')
-                          or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
 
         self._dated_features = {}
 
diff --git a/predictops/source/sentinelles.py b/predictops/source/sentinelles.py
new file mode 100644 (file)
index 0000000..38e7171
--- /dev/null
@@ -0,0 +1,17 @@
+from configparser import ConfigParser
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Sentinelles:
+    def __init__(self, config_file):
+        '''
+        Constructor of the MeteoFrance source of feature.
+        '''
+        self._config = ConfigParser()
+        self._config.read(config_file)
index 9e3d86d31de82f24ad8a8c2bd6abf6e742f446fe..fdff54ba2259aef28f13964f71b8e115f826622b 100644 (file)
@@ -11,8 +11,8 @@ logger = getLogger()
 
 class Target:
 
-    def __init__(self, config_file = None,
-                 start = None, end = None, timestep = None, cumulative = None):
+    def __init__(self, config_file=None,
+                 start=None, end=None, timestep=None, cumulative=None):
 
         self._config = ConfigParser()
         self._config.read(config_file)
@@ -33,8 +33,6 @@ class Target:
         self._stream_file = eval(self._config['DATA']['csv_file'])
         self._get_located_interventions()
 
-
-
     @property
     def start(self):
         return self._start
@@ -43,7 +41,6 @@ class Target:
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
@@ -52,7 +49,6 @@ class Target:
     def end(self, x):
         self._end = x
 
-
     @property
     def y(self):
         return self._y
@@ -61,8 +57,6 @@ class Target:
     def end(self, y):
         self._y = y
 
-
-
     def _get_located_interventions(self):
         if not self._config['SPECIFICATION'].getboolean('origin')\
            and not self._config['SPECIFICATION'].getboolean('destination'):
index 6615f1fc4c5a72ac226089c20e20c6f9babcaef1..bd50026191e5c3fd31a93f009fab3e6eae9c98e8 100644 (file)
@@ -1,3 +1,4 @@
+astral==2.1
 attrs==19.3.0
 Click==7.0
 click-plugins==1.1.1