Refactoring, fin du lever/coucher de soleil, et début de sentinelles

author Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>

Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)

committer Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>

Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
author Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
committer Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
diff --git a/config/features/feature_ephemeris.cfg b/config/features/feature_ephemeris.cfg

index 3ed31c76c6b3e5fa2124886b3adc5e0233d2841b..47c8f12e02b311075b85c8c159c13806cba94cdd 100644 (file)
--- a/config/features/feature_ephemeris.cfg
+++ b/config/features/feature_ephemeris.cfg
@@ -31,4 +31,24 @@ numerical   = True
  [year]
  binary      = False
  categorical = False
-numerical   = True
-\ No newline at end of file
+numerical   = True
+
+[sunRised]
+binary      = True
+categorical = False
+numerical   = False
+
+[noon]
+binary      = True
+categorical = False
+numerical   = False
+
+[night]
+binary      = True
+categorical = False
+numerical   = False
+
+[daylightSavingTime]
+binary      = True
+categorical = False
+numerical   = False
diff --git a/config/features/feature_meteo.cfg b/config/features/feature_meteo.cfg

index 5b694e5f021ad85d7da6c012c885fef1af514a13..a0a0eabead9422130c2440ba96560f10c23b32c6 100644 (file)
--- a/config/features/feature_meteo.cfg
+++ b/config/features/feature_meteo.cfg
@@ -2,10 +2,6 @@
  regenerate = False
  reinsert   = True
  
-[POSITION]
-latitude  = 47.25
-longitude = 6.0333
-
  [STATIONS]
  nb_stations = 3
  
diff --git a/config/learn.cfg b/config/learn.cfg

index aef0edd22f22e03a84ea12dacf04f4bcf3db6322..9081652d64e234726d3362fd36179efd124cdb22 100644 (file)
--- a/config/learn.cfg
+++ b/config/learn.cfg
@@ -1,7 +1,17 @@
  [DATETIME]
-start    = 01/01/2006 00:00:00
+start    = 01/01/2016 00:00:00
  end      = 12/31/2019 23:00:00
-hourStep = 1
+hourStep = 3
+
+
+[HISTORY_KNOWLEDGE]
+nb_lines = 24//3*7*4
+
+
+[TARGET]
+config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+cumulative  = True
+horizon     = 3
  
  
  [FEATURES]
@@ -9,6 +19,7 @@ ephemeris   = True
  holidays    = True
  meteofrance = True
  ramadan     = True
+sentinelles = True
  
  
  [FEATURE_CONFIG]
@@ -16,6 +27,7 @@ ephemeris   = (Path.cwd() / 'config') / 'features' / 'feature_ephemeris.cfg'
  holidays    = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
  meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
  ramadan     = (Path.cwd() / 'config') / 'features' / 'feature_ramadan.cfg'
+sentinelles = (Path.cwd() / 'config') / 'features' / 'feature_sentinelles.cfg'
  
  
  [PREPROCESSING]
@@ -23,14 +35,5 @@ fill_method = linear
  order       = 3
  
  
-[HISTORY_KNOWLEDGE]
-nb_lines = 24*7*4
-
-
-[TARGET]
-config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
-cumulative  = True
-horizon     = 0
-
  [LEARNER]
  config =  (Path.cwd() / 'config') / 'learners' / 'lightgbm.cfg'
 \ No newline at end of file
diff --git a/config/learners/lightgbm.cfg b/config/learners/lightgbm.cfg

index b5887f0497b8db84204398c21990d6df43c04559..32d19728c7fc29866135c3f9c205facc9b11b7ea 100644 (file)
--- a/config/learners/lightgbm.cfg
+++ b/config/learners/lightgbm.cfg
@@ -4,7 +4,7 @@ method = lightgbm
  [HYPERPARAMETERS]
  learning_rate  = 0.1
  metric         = auc
-num_iterations = 1000
+num_iterations = 300
  num_round      = 10
-num_leaves     = 31
+num_leaves     = 900
  objective      = poisson
diff --git a/config/learners/xgboost.cfg b/config/learners/xgboost.cfg

index f9e53298ef6586768c7c5d7d3a55e697f604670b..9a1836330d00b827edfeeede7a082e809efdd191 100644 (file)
--- a/config/learners/xgboost.cfg
+++ b/config/learners/xgboost.cfg
@@ -4,7 +4,7 @@ method = xgboost
  
  [HYPERPARAMETERS]
  learning_rate = 0.01
-max_depth     = 7
+max_depth     = 6
  random_state  = 42
  n_estimators  = 10000
  n_jobs        = -1
diff --git a/config/targets/sdis25.cfg b/config/targets/sdis25.cfg

index 712e93b1d6f3ea78815c3c7d87e747b9a0c0d8fd..2d4f9f0d48a1ab0cd329dfadaa925f4121a618db 100644 (file)
--- a/config/targets/sdis25.cfg
+++ b/config/targets/sdis25.cfg
@@ -1,3 +1,10 @@
+[POSITION]
+name      = Besançon
+country   = France
+timezone  = Europe/Paris
+latitude  = 47.237829
+longitude = -6.0240539
+
  [SPECIFICATION]
  origin      = False
  destination = False
diff --git a/predictops/engine.py b/predictops/engine.py

index a63ef2932aee9086fb23ede8902ec4ea0820adb8..bb85787ca304aa2643a5c1e2c42e53362ced5689 100644 (file)
--- a/predictops/engine.py
+++ b/predictops/engine.py
@@ -6,13 +6,15 @@ from pathlib import Path
  from shutil import rmtree
  
  import os
+import pytz
  
  from .learn.learning import Learning
  from .learn.preprocessing import Preprocessing
  from .source.ephemeris import Ephemeris
  from .source.holidays import Holidays
-from .source.ramadan import Ramadan
  from .source.meteofrance import MeteoFrance
+from .source.ramadan import Ramadan
+from .source.sentinelles import Sentinelles
  from .target.target import Target
  
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
@@ -68,10 +70,10 @@ class Engine(object):
                  self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
                  self._config_text += f.read() + '\n\n'
  
-            ephemerides = Ephemeris(config_file=config_file)
+            ephemerides = Ephemeris(config_file=config_file, start=self._start, end=self._end)
  
-            ephemerides.start = self._start
-            ephemerides.end = self._end
+            # ephemerides.start = self._start
+            # ephemerides.end = self._end
  
              dated_features = ephemerides.dated_features
              for date in dated_features:
@@ -136,6 +138,8 @@ class Engine(object):
  
      def add_preprocessing(self):
          self._preproc = Preprocessing(config_file=self._config,
+                                      start=self._start, end=self._end,
+                                      timestep=self._timestep,
                                        dict_features=self.X,
                                        dict_target=self.y)
  
diff --git a/predictops/learn/learning.py b/predictops/learn/learning.py

index fb3675d56d7e007c557d93aeedd64bcc514a8df0..f13c3a6514eb2a8b552a4aa80fd13b275f590036 100644 (file)
--- a/predictops/learn/learning.py
+++ b/predictops/learn/learning.py
@@ -32,10 +32,6 @@ class Learning:
          else:
              self._X = X
              self._y = y
-        rep = (Path.cwd() / self._file_name)
-        rep.mkdir()
-        self._filename = str(self._file_name / os.path.basename(self._file_name))
-        self._X.to_csv(self._filename + '.csv')
          self._learn()
          self._evaluate()
  
@@ -92,6 +88,10 @@ class Learning:
          for k in range(10):
              txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
  
+        rep = (Path.cwd() / self._file_name)
+        rep.mkdir()
+        self._filename = str(self._file_name / os.path.basename(self._file_name))
+
          print(txt)
          with open(self._filename + ".result", 'w') as f:
              f.write(txt)
diff --git a/predictops/learn/preprocessing.py b/predictops/learn/preprocessing.py

index 9bc09ad2eca2759c22b6047c3ded8ab747e015de..55cffbd2a0e094a610580e1e4dbcdf8adc80de5d 100644 (file)
--- a/predictops/learn/preprocessing.py
+++ b/predictops/learn/preprocessing.py
@@ -1,10 +1,7 @@
  from configparser import ConfigParser
-from csv import DictReader
-from datetime import datetime, timedelta
  from itertools import chain
  from logging import getLogger
  from logging.config import fileConfig
-from os import listdir
  from pathlib import Path
  from sklearn import preprocessing
  
@@ -27,17 +24,16 @@ class Preprocessing:
      '''
  
      def __init__(self, config_file=None,
+                 start=None, end=None, timestep=None,
                   dict_features=None, dict_target=None):
          '''
          Constructor that defines all needed attributes and collects features.
          '''
          self._config = config_file
  
-        self._start = datetime.strptime(self._config['DATETIME']['start'],
-                                        '%m/%d/%Y %H:%M:%S')
-        self._end = datetime.strptime(self._config['DATETIME']['end'],
-                                      '%m/%d/%Y %H:%M:%S')
-        self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
+        self._start = start
+        self._end = end
+        self._timestep = timestep
          self._dict_features = dict_features
          self._dict_target = dict_target
  
@@ -196,6 +192,7 @@ class Preprocessing:
          '''
          logger.info("One hot encoding for categorical feature")
          # We store numerical columns
+
          df_out = pd.DataFrame()
          for col in self._numerical_columns:
              df_out[col] = self._dataframe[col]
@@ -222,6 +219,8 @@ class Preprocessing:
              self._fill_nan()
              # Adding previous (historical) nb_interventions as features
              self._add_history()
+            # self._dataframe.to_csv('toto.csv')
+            # exit()
              # Normalizing numerical values
              self._standardize()
              # Dealing with categorical features
diff --git a/predictops/source/ephemeris.py b/predictops/source/ephemeris.py

index e1c07ecfe604de8012715c25a773c2a66fed7bb7..e04a41a46470d7981db259b42c0ba833c8c53639 100644 (file)
--- a/predictops/source/ephemeris.py
+++ b/predictops/source/ephemeris.py
@@ -1,33 +1,40 @@
+from astral import LocationInfo
+from astral.sun import sun
  from configparser import ConfigParser
  from datetime import datetime, timedelta
  from logging import getLogger
  from logging.config import fileConfig
  from pathlib import Path
  
-import time
  import calendar
+import pytz
+import time
  
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
+
  class Ephemeris:
  
      _start = None
-    _end   = None
+    _end = None
  
-    def __init__(self, config_file):
+    def __init__(self, config_file, start, end):
  
          self._config = ConfigParser()
          self._config.read(config_file)
  
+        self._city = LocationInfo("Besançon", "France", "Europe/Paris", 47.237829, -6.0240539)
+        self._start = start
+        self._end = end
          # Collecting ephemeris features
          self._features = [section for section in self._config
-                              if self._config[section].getboolean('numerical')
-                              or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
  
          self._dated_features = {}
  
-
      @property
      def start(self):
          return self._start
@@ -36,7 +43,6 @@ class Ephemeris:
      def start(self, x):
          self._start = x
  
-
      @property
      def end(self):
          return self._end
@@ -45,15 +51,18 @@ class Ephemeris:
      def end(self, x):
          self._end = x
  
-
      @property
      def dated_features(self):
          if self._dated_features == {}:
              logger.info("Adding ephemeris features")
+            paris = pytz.timezone('Europe/Paris')
              date = self._start
              while date <= self._end:
+                datel = paris.localize(date)
                  dict_hour = {}
                  Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
+                s = sun(self._city.observer, date=date,
+                        tzinfo=pytz.timezone('Europe/Paris'))
                  for feature in self._features:
                      if feature == 'hour':
                          dict_hour['hour'] = Date.tm_hour
@@ -69,12 +78,23 @@ class Ephemeris:
                          # Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
                          # dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
                          # les 24 décembre... tombent toujours
-                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"):
-                            dict_hour['dayInYear'] = Date.tm_yday -1
+                        if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/" + str(Date.tm_year), "%d/%m/%Y"):
+                            dict_hour['dayInYear'] = Date.tm_yday - 1
                          else:
                              dict_hour['dayInYear'] = Date.tm_yday
                      elif feature == 'weekInYear':
                          dict_hour['weekInYear'] = date.isocalendar()[1]
+                    elif feature == 'sunRised':
+                        dict_hour['sunRised'] = (datel >= s["sunrise"] - timedelta(minutes=30)
+                                                 and datel <= s["sunset"] - timedelta(minutes=30))
+                    elif feature == 'noon':
+                        dict_hour['noon'] = (datel.hour == s["noon"].hour)
+                    elif feature == 'night':
+                        dict_hour['night'] = (datel <= s["dawn"] - timedelta(minutes=30)
+                                              or datel >= s["dusk"] - timedelta(minutes=30))
+                    elif feature == 'daylightSavingTime':
+                        dict_hour['daylightSavingTime'] = (datel.dst() == timedelta(0))
+
                  self._dated_features[date] = dict_hour
                  date += timedelta(hours=1)
-        return self._dated_features
-\ No newline at end of file
+        return self._dated_features
diff --git a/predictops/source/holidays.py b/predictops/source/holidays.py

index 6893db03ef5ef7eb804d3f1d9ec391cf18a2dcaf..db61f0151d74fbfd04f68aece531ddc6d03bc44c 100644 (file)
--- a/predictops/source/holidays.py
+++ b/predictops/source/holidays.py
@@ -11,10 +11,11 @@ import itertools
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
+
  class Holidays:
  
      _start = None
-    _end   = None
+    _end = None
  
      def __init__(self, config_file):
  
@@ -23,12 +24,12 @@ class Holidays:
  
          # Collecting holidays features
          self._features = [section for section in self._config
-                              if self._config[section].getboolean('numerical')
-                              or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
  
          self._dated_features = {}
  
-
      @property
      def start(self):
          return self._start
@@ -37,7 +38,6 @@ class Holidays:
      def start(self, x):
          self._start = x
  
-
      @property
      def end(self):
          return self._end
@@ -46,49 +46,46 @@ class Holidays:
      def end(self, x):
          self._end = x
  
-
-
      def _get_academic_zone(self, name, date):
          dict_zones = {
-        'Caen' : ('A', 'B'),
-        'Clermont-Ferrand' : ('A', 'A'),
-        'Grenoble' : ('A', 'A'),
-        'Lyon' : ('A', 'A'),
-        'Montpellier' : ('A', 'C'),
-        'Nancy-Metz' : ('A', 'B'),
-        'Nantes' : ('A', 'B'),
-        'Rennes' : ('A', 'B'),
-        'Toulouse' : ('A', 'C'),
-        'Aix-Marseille' : ('B', 'B'),
-        'Amiens' : ('B', 'B'),
-        'Besançon' : ('B', 'A'),
-        'Dijon' : ('B', 'A'),
-        'Lille' : ('B', 'B'),
-        'Limoges' : ('B', 'A'),
-        'Nice' : ('B', 'B'),
-        'Orléans-Tours' : ('B', 'B'),
-        'Poitiers' : ('B', 'A'),
-        'Reims' : ('B', 'B'),
-        'Rouen ' : ('B', 'B'),
-        'Strasbourg' : ('B', 'B'),
-        'Bordeaux' : ('C', 'A'),
-        'Créteil' : ('C', 'C'),
-        'Paris' : ('C', 'C'),
-        'Versailles' : ('C', 'C')
+            'Caen': ('A', 'B'),
+            'Clermont-Ferrand': ('A', 'A'),
+            'Grenoble': ('A', 'A'),
+            'Lyon': ('A', 'A'),
+            'Montpellier': ('A', 'C'),
+            'Nancy-Metz': ('A', 'B'),
+            'Nantes': ('A', 'B'),
+            'Rennes': ('A', 'B'),
+            'Toulouse': ('A', 'C'),
+            'Aix-Marseille': ('B', 'B'),
+            'Amiens': ('B', 'B'),
+            'Besançon': ('B', 'A'),
+            'Dijon': ('B', 'A'),
+            'Lille': ('B', 'B'),
+            'Limoges': ('B', 'A'),
+            'Nice': ('B', 'B'),
+            'Orléans-Tours': ('B', 'B'),
+            'Poitiers': ('B', 'A'),
+            'Reims': ('B', 'B'),
+            'Rouen ': ('B', 'B'),
+            'Strasbourg': ('B', 'B'),
+            'Bordeaux': ('C', 'A'),
+            'Créteil': ('C', 'C'),
+            'Paris': ('C', 'C'),
+            'Versailles': ('C', 'C')
          }
          if date < datetime(2016, 1, 1):
              return dict_zones[name][0]
          else:
              return dict_zones[name][1]
  
-
      @property
      def dated_features(self):
          if self._dated_features == {}:
              logger.info("Adding holidays features")
              bankHolidays = tuple(itertools.chain.from_iterable(list(JoursFeries.for_year(k).values())
-                                           for k in range(self.start.year, self.end.year+1)))
-            bankHolidaysEve = tuple(u-timedelta(days=1) for u in bankHolidays)
+                                                               for k in range(self.start.year, self.end.year + 1)))
+            bankHolidaysEve = tuple(u - timedelta(days=1) for u in bankHolidays)
              name = self._config['ZONE']['name']
              date = self._start
              Date = datetime.date(date)
@@ -96,7 +93,7 @@ class Holidays:
              Tomorrow = datetime.date(tomorrow)
              d = SchoolHolidayDates()
              dict_hour = {
-                'bankHolidays' : Date in bankHolidays,
+                'bankHolidays': Date in bankHolidays,
                  'bankHolidaysEve': Date in bankHolidaysEve,
                  'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                  'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
@@ -110,9 +107,9 @@ class Holidays:
                      tomorrow = date + timedelta(days=1)
                      Tomorrow = datetime.date(tomorrow)
                      dict_hour = {
-                        'bankHolidays' : Date in bankHolidays,
+                        'bankHolidays': Date in bankHolidays,
                          'bankHolidaysEve': Date in bankHolidaysEve,
                          'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
                          'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
                      }
-        return self._dated_features
-\ No newline at end of file
+        return self._dated_features
diff --git a/predictops/source/meteofrance.py b/predictops/source/meteofrance.py

index 0edd49f544f33ed07b17c93dc7b1e493a6d0e6e5..05eb7104563ba6ce7a2b2cab653ee3463b88642e 100644 (file)
--- a/predictops/source/meteofrance.py
+++ b/predictops/source/meteofrance.py
@@ -19,12 +19,12 @@ logger = getLogger()
  
  class MeteoFrance:
  
-    _latitude    = None
-    _longitude   = None
+    _latitude = None
+    _longitude = None
      _nb_stations = None
-    _start       = None
-    _end         = None
-    _features    = None
+    _start = None
+    _end = None
+    _features = None
  
      def __init__(self, config_file):
          '''
@@ -51,10 +51,9 @@ class MeteoFrance:
          # Collecting meteofrance features
          self._features = [section for section in self._config
                            if self._config.has_option(section, 'numerical')
-                          and (self._config[section]['numerical'] or
-                               self._config[section]['categorical'])]
-
-
+                          and (self._config[section]['binary'] or
+                               self._config[section]['categorical'] or
+                               self._config[section]['numerical'])]
  
      @property
      def start(self):
@@ -64,7 +63,6 @@ class MeteoFrance:
      def start(self, x):
          self._start = x
  
-
      @property
      def end(self):
          return self._end
@@ -73,7 +71,6 @@ class MeteoFrance:
      def end(self, x):
          self._end = x
  
-
      @property
      def latitude(self):
          return self._latitude
@@ -82,7 +79,6 @@ class MeteoFrance:
      def latitude(self, x):
          self._latitude = x
  
-
      @property
      def longitude(self):
          return self._longitude
@@ -91,7 +87,6 @@ class MeteoFrance:
      def longitude(self, x):
          self._longitude = x
  
-
      @property
      def nb_stations(self):
          return self._nb_stations
@@ -100,7 +95,6 @@ class MeteoFrance:
      def nb_stations(self, x):
          self._nb_stations = x
  
-
      def _regenerate_directory(self):
          '''
          Re-creating data directory architecture for MeteoFrance
@@ -115,8 +109,6 @@ class MeteoFrance:
          p = Path(self._data_directory / 'config')
          p.mkdir(exist_ok=True, parents=True)
  
-
-
      def _get_stations(self):
          '''
          Collect (after downloading them, if needed) the stations and their
@@ -129,7 +121,7 @@ class MeteoFrance:
          # The csv file of meteo stations (names, ids and locations) if downloaded,
          # if not available in the config directory within data / meteo_france
          link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
-        p = Path(self._data_directory / 'config' )
+        p = Path(self._data_directory / 'config')
          csv_file = p / basename(link)
          if not isfile(csv_file):
              logger.info('Downloading location stations from MeteoFrance')
@@ -142,11 +134,11 @@ class MeteoFrance:
              reader = DictReader(f, delimiter=';')
              for row in reader:
                  latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
-                self._dict_stations[row['Nom'].replace("'",'’')] = {
-                    'id' : row['ID'],
-                    'longitude' : longitude,
-                    'latitude' : latitude,
-                    'distance' : vincenty(
+                self._dict_stations[row['Nom'].replace("'", '’')] = {
+                    'id': row['ID'],
+                    'longitude': longitude,
+                    'latitude': latitude,
+                    'distance': vincenty(
                          (self._latitude, self._longitude),
                          (latitude, longitude)).km
                  }
@@ -154,13 +146,11 @@ class MeteoFrance:
          # Find the closest stations
          logger.info('Finding the closest stations')
          stations_by_distance = sorted(self._dict_stations.keys(),
-                                      key = lambda x: self._dict_stations[x]['distance'])
+                                      key=lambda x: self._dict_stations[x]['distance'])
          logger.info(f'The {self._nb_stations} closest stations are: '
                      f'{", ".join(stations_by_distance[:self._nb_stations])}.')
          return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
  
-
-
      def _collect_historical_data(self):
          '''
          We collect all csv files from January 1996 until the month
@@ -171,8 +161,8 @@ class MeteoFrance:
          # List of year-months to consider
          historical = []
          date_end = self._end
-        for year in range(self._start.year, date_end.year+1):
-            for month in range(1,13):
+        for year in range(self._start.year, date_end.year + 1):
+            for month in range(1, 13):
                  date = datetime(year, month, 1)
                  if date >= self._start and date <= date_end:
                      historical.append(date.strftime("%Y%m"))
@@ -183,7 +173,7 @@ class MeteoFrance:
          p = Path(meteo_data)
          p.mkdir(exist_ok=True, parents=True)
          for date in historical:
-            if not isfile(meteo_data / ('synop.'+date+'.csv')):
+            if not isfile(meteo_data / ('synop.' + date + '.csv')):
                  link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
                  link += date + '.csv.gz'
                  download_path = meteo_data / basename(link)
@@ -194,8 +184,6 @@ class MeteoFrance:
                          g.write(f.read().decode())
                          remove(meteo_data / basename(link))
  
-
-
      def update(self):
          '''
          Update the MeteoFrance features with the last available data
@@ -209,15 +197,13 @@ class MeteoFrance:
  
          logger.info('Update historical csv files from MeteoFrance, if needed')
          today = datetime.now()
-        todel = 'synop.'+today.strftime("%Y%m")+".csv"
+        todel = 'synop.' + today.strftime("%Y%m") + ".csv"
          try:
              remove(self._data_directory / 'historical' / todel)
          except:
              logger.warning(f"{self._data_directory / 'historical' / todel} not found")
          self._collect_historical_data()
  
-
-
      @property
      def dated_features(self):
          '''
@@ -234,26 +220,27 @@ class MeteoFrance:
              logger.info('Collecting meteofrance feature information')
              # A dictionary for the features
              dico_features = {self._config[section]["abbreviation"]:
-                               {
-                                   'name': section, # feature name
-                                   'numerical': self._config[section]['numerical'],
-                                   'categorical': self._config[section]['categorical']
-                                }
-                            for section in self._features}
+                             {
+                'name': section,  # feature name
+                'numerical': self._config[section]['numerical'],
+                'categorical': self._config[section]['categorical']
+            }
+                for section in self._features}
              dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
              self._dated_features = {}
+
              for csv_meteo in sorted(listdir(dir_data)):
                  date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                  if (date >= self._start and date <= self._end)\
-                or (date.year == self._start.year and date.month == self._start.month)\
-                or (date.year == self._end.year and date.month == self._end.month):
+                        or (date.year == self._start.year and date.month == self._start.month)\
+                        or (date.year == self._end.year and date.month == self._end.month):
                      logger.info(f'Adding meteofrance features from {csv_meteo}')
                      with open(dir_data / csv_meteo, "r") as f:
                          reader = DictReader(f, delimiter=';')
                          for row in reader:
                              if row['numer_sta'] in self._stations:
                                  date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
-                                if date  >= self._start and date <= self._end:
-                                    self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                                if date >= self._start and date <= self._end:
+                                    self._dated_features.setdefault(date, {}).update({dico_features[feat]['name'] + '_' + str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq', 'None')) for feat in dico_features})
          return self._dated_features
  
diff --git a/predictops/source/ramadan.py b/predictops/source/ramadan.py

index 6836df15a474016645d6665c163767b78577975c..e0875d2ce47ce2875ad7e0532aafbcf36fa94b9b 100644 (file)
--- a/predictops/source/ramadan.py
+++ b/predictops/source/ramadan.py
@@ -22,8 +22,9 @@ class Ramadan:
  
          # Collecting holidays features
          self._features = [section for section in self._config
-                          if self._config[section].getboolean('numerical')
-                          or self._config[section].getboolean('categorical')]
+                          if self._config[section].getboolean('binary')
+                          or self._config[section].getboolean('categorical')
+                          or self._config[section].getboolean('numerical')]
  
          self._dated_features = {}
  
diff --git a/predictops/source/sentinelles.py b/predictops/source/sentinelles.py

new file mode 100644 (file)

index 0000000..38e7171
--- /dev/null
+++ b/predictops/source/sentinelles.py
@@ -0,0 +1,17 @@
+from configparser import ConfigParser
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Sentinelles:
+    def __init__(self, config_file):
+        '''
+        Constructor of the MeteoFrance source of feature.
+        '''
+        self._config = ConfigParser()
+        self._config.read(config_file)
diff --git a/predictops/target/target.py b/predictops/target/target.py

index 9e3d86d31de82f24ad8a8c2bd6abf6e742f446fe..fdff54ba2259aef28f13964f71b8e115f826622b 100644 (file)
--- a/predictops/target/target.py
+++ b/predictops/target/target.py
@@ -11,8 +11,8 @@ logger = getLogger()
  
  class Target:
  
-    def __init__(self, config_file = None,
-                 start = None, end = None, timestep = None, cumulative = None):
+    def __init__(self, config_file=None,
+                 start=None, end=None, timestep=None, cumulative=None):
  
          self._config = ConfigParser()
          self._config.read(config_file)
@@ -33,8 +33,6 @@ class Target:
          self._stream_file = eval(self._config['DATA']['csv_file'])
          self._get_located_interventions()
  
-
-
      @property
      def start(self):
          return self._start
@@ -43,7 +41,6 @@ class Target:
      def start(self, x):
          self._start = x
  
-
      @property
      def end(self):
          return self._end
@@ -52,7 +49,6 @@ class Target:
      def end(self, x):
          self._end = x
  
-
      @property
      def y(self):
          return self._y
@@ -61,8 +57,6 @@ class Target:
      def end(self, y):
          self._y = y
  
-
-
      def _get_located_interventions(self):
          if not self._config['SPECIFICATION'].getboolean('origin')\
             and not self._config['SPECIFICATION'].getboolean('destination'):
diff --git a/requirements.txt b/requirements.txt

index 6615f1fc4c5a72ac226089c20e20c6f9babcaef1..bd50026191e5c3fd31a93f009fab3e6eae9c98e8 100644 (file)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+astral==2.1
  attrs==19.3.0
  Click==7.0
  click-plugins==1.1.1
author	Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
	Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
committer	Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
	Mon, 24 Feb 2020 07:48:31 +0000 (08:48 +0100)
config/features/feature_ephemeris.cfg		patch \| blob \| history
config/features/feature_meteo.cfg		patch \| blob \| history
config/learn.cfg		patch \| blob \| history
config/learners/lightgbm.cfg		patch \| blob \| history
config/learners/xgboost.cfg		patch \| blob \| history
config/targets/sdis25.cfg		patch \| blob \| history
predictops/engine.py		patch \| blob \| history
predictops/learn/learning.py		patch \| blob \| history
predictops/learn/preprocessing.py		patch \| blob \| history
predictops/source/ephemeris.py		patch \| blob \| history
predictops/source/holidays.py		patch \| blob \| history
predictops/source/meteofrance.py		patch \| blob \| history
predictops/source/ramadan.py		patch \| blob \| history
predictops/source/sentinelles.py	[new file with mode: 0644]	patch \| blob
predictops/target/target.py		patch \| blob \| history
requirements.txt		patch \| blob \| history