]> AND Private Git Repository - predictops.git/commitdiff
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Adding ramadan features, and binary category of feat.
authorChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Sun, 23 Feb 2020 08:39:21 +0000 (09:39 +0100)
committerChristophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Sun, 23 Feb 2020 08:39:21 +0000 (09:39 +0100)
16 files changed:
config/features/feature_ephemeris.cfg
config/features/feature_holidays.cfg
config/features/feature_meteo.cfg
config/features/feature_ramadan.cfg [new file with mode: 0644]
config/learn.cfg
config/learners/lightgbm.cfg [new file with mode: 0644]
config/learners/xgboost.cfg
main.py
predictops/engine.py
predictops/learn/learning.py
predictops/learn/preprocessing.py
predictops/source/holidays.py
predictops/source/meteofrance.py
predictops/source/ramadan.py [new file with mode: 0644]
predictops/target/target.py
requirements.txt

index decc7871efc82357e6b9469d787f07c25fbd5474..3ed31c76c6b3e5fa2124886b3adc5e0233d2841b 100644 (file)
@@ -1,27 +1,34 @@
 [hour]
 [hour]
+binary      = False
 categorical = True
 numerical   = False
 
 [dayInWeek]
 categorical = True
 numerical   = False
 
 [dayInWeek]
+binary      = False
 categorical = True
 numerical   = False
 
 [dayInMonth]
 categorical = True
 numerical   = False
 
 [dayInMonth]
+binary      = False
 categorical = True
 numerical   = False
 
 [dayInYear]
 categorical = True
 numerical   = False
 
 [dayInYear]
+binary      = False
 categorical = True
 numerical   = False
 
 [weekInYear]
 categorical = True
 numerical   = False
 
 [weekInYear]
+binary      = False
 categorical = True
 numerical   = False
 
 [month]
 categorical = True
 numerical   = False
 
 [month]
+binary      = False
 categorical = False
 numerical   = True
 
 [year]
 categorical = False
 numerical   = True
 
 [year]
+binary      = False
 categorical = False
 numerical   = True
\ No newline at end of file
 categorical = False
 numerical   = True
\ No newline at end of file
index c3b3063c99e6ab7283423965f8626f007c88ff10..ccccbd729182ac9c3f0ebf6c68ebbf44a091dc45 100644 (file)
@@ -2,17 +2,21 @@
 name = Besançon
 
 [bankHolidays]
 name = Besançon
 
 [bankHolidays]
-categorical = True
+binary      = True
+categorical = False
 numerical   = False
 
 [bankHolidaysEve]
 numerical   = False
 
 [bankHolidaysEve]
-categorical = True
+binary      = True
+categorical = False
 numerical   = False
 
 [holidays]
 numerical   = False
 
 [holidays]
-categorical = True
+binary      = True
+categorical = False
 numerical   = False
 
 [holidaysEve]
 numerical   = False
 
 [holidaysEve]
-categorical = True
+binary      = True
+categorical = False
 numerical   = False
 numerical   = False
index 04f6c620eb65d7f621c5fd22f90588d6be4beee4..5b694e5f021ad85d7da6c012c885fef1af514a13 100644 (file)
@@ -11,65 +11,78 @@ nb_stations = 3
 
 [temperature]
 abbreviation = t
 
 [temperature]
 abbreviation = t
+binary       = False
 categorical  = False
 numerical    = True
 
 [pressure]
 abbreviation = pres
 categorical  = False
 numerical    = True
 
 [pressure]
 abbreviation = pres
+binary       = False
 categorical  = False
 numerical    = True
 
 [pressureVariation]
 abbreviation = tend
 categorical  = False
 numerical    = True
 
 [pressureVariation]
 abbreviation = tend
+binary       = False
 categorical  = False
 numerical    = True
 
 [barometricTrend]
 abbreviation = cod_tend
 categorical  = False
 numerical    = True
 
 [barometricTrend]
 abbreviation = cod_tend
+binary       = False
 categorical  = True
 numerical    = False
 
 [humidity]
 abbreviation = u
 categorical  = True
 numerical    = False
 
 [humidity]
 abbreviation = u
+binary       = False
 categorical  = False
 numerical    = True
 
 [dewPoint]
 abbreviation = td
 categorical  = False
 numerical    = True
 
 [dewPoint]
 abbreviation = td
+binary       = False
 categorical  = False
 numerical    = True
 
 [lastHourRainfall]
 abbreviation = rr1
 categorical  = False
 numerical    = True
 
 [lastHourRainfall]
 abbreviation = rr1
+binary       = False
 categorical  = False
 numerical    = True
 
 [last3hHourRainfall]
 abbreviation = rr3
 categorical  = False
 numerical    = True
 
 [last3hHourRainfall]
 abbreviation = rr3
+binary       = False
 categorical  = False
 numerical    = True
 
 [meanWindSpeed10min]
 abbreviation = ff
 categorical  = False
 numerical    = True
 
 [meanWindSpeed10min]
 abbreviation = ff
+binary       = False
 categorical  = False
 numerical    = True
 
 [meanWindDirection10min]
 abbreviation = dd
 categorical  = False
 numerical    = True
 
 [meanWindDirection10min]
 abbreviation = dd
+binary       = False
 categorical  = False
 numerical    = True
 
 [gustsOverAPeriod]
 abbreviation = rafper
 categorical  = False
 numerical    = True
 
 [gustsOverAPeriod]
 abbreviation = rafper
+binary       = False
 categorical  = False
 numerical    = True
 
 [horizontalVisibility]
 abbreviation = vv
 categorical  = False
 numerical    = True
 
 [horizontalVisibility]
 abbreviation = vv
+binary       = False
 categorical  = False
 numerical    = True
 
 [currentWeather]
 abbreviation = ww
 categorical  = False
 numerical    = True
 
 [currentWeather]
 abbreviation = ww
+binary       = False
 categorical  = True
 numerical    = False
\ No newline at end of file
 categorical  = True
 numerical    = False
\ No newline at end of file
diff --git a/config/features/feature_ramadan.cfg b/config/features/feature_ramadan.cfg
new file mode 100644 (file)
index 0000000..b9dc2c8
--- /dev/null
@@ -0,0 +1,14 @@
+[ramadanEve]
+binary      = True
+categorical = False
+numerical   = False
+
+[ramadan]
+binary      = True
+categorical = False
+numerical   = False
+
+[ramadanDayAfter]
+binary      = True
+categorical = False
+numerical   = False
\ No newline at end of file
index 82c67ddf5cc81797dc8edcb5ffe1178966565387..73379cf605405d0a8bccf95089daf343064ad5ee 100644 (file)
@@ -1,33 +1,36 @@
 [DATETIME]
 [DATETIME]
-start    = 01/01/2016 00:00:00
-end      = 12/31/2018 23:00:00
+start    = 01/01/2006 00:00:00
+end      = 12/31/2019 23:00:00
 hourStep = 3
 
 
 [FEATURES]
 hourStep = 3
 
 
 [FEATURES]
-meteofrance = True
 ephemeris   = True
 holidays    = True
 ephemeris   = True
 holidays    = True
+meteofrance = True
+ramadan     = True
 
 
 [FEATURE_CONFIG]
 
 
 [FEATURE_CONFIG]
-meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
 ephemeris   = (Path.cwd() / 'config') / 'features' / 'feature_ephemeris.cfg'
 holidays    = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
 ephemeris   = (Path.cwd() / 'config') / 'features' / 'feature_ephemeris.cfg'
 holidays    = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
+meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
+ramadan     = (Path.cwd() / 'config') / 'features' / 'feature_ramadan.cfg'
 
 
 [PREPROCESSING]
 
 
 [PREPROCESSING]
-fill_method = spline
+fill_method = linear
 order       = 3
 
 
 [HISTORY_KNOWLEDGE]
 order       = 3
 
 
 [HISTORY_KNOWLEDGE]
-nb_lines = 5
+nb_lines = 24//3*7*4
 
 
 [TARGET]
 
 
 [TARGET]
-config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
-
+config      = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+cumulative  = True
+horizon     = 1
 
 [LEARNER]
 config =  (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg'
\ No newline at end of file
 
 [LEARNER]
 config =  (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg'
\ No newline at end of file
diff --git a/config/learners/lightgbm.cfg b/config/learners/lightgbm.cfg
new file mode 100644 (file)
index 0000000..ef062b3
--- /dev/null
@@ -0,0 +1,10 @@
+[MODEL]
+method = lightgbm
+
+[HYPERPARAMETERS]
+learning_rate  = 0.1
+metric         = auc
+num_iterations = 100
+num_round      = 10
+num_leaves     = 31
+objective      = poisson
index d099aa417039d0fa98fb53e9591e582fa6bae86a..f9e53298ef6586768c7c5d7d3a55e697f604670b 100644 (file)
@@ -6,6 +6,6 @@ method = xgboost
 learning_rate = 0.01
 max_depth     = 7
 random_state  = 42
 learning_rate = 0.01
 max_depth     = 7
 random_state  = 42
-n_estimators  = 1000
+n_estimators  = 10000
 n_jobs        = -1
 objective     = 'count:poisson'
\ No newline at end of file
 n_jobs        = -1
 objective     = 'count:poisson'
\ No newline at end of file
diff --git a/main.py b/main.py
index 27f502aeaa676fd0a0500e2ca4f20a295f5d0ce9..d451534d130211ace744abc93379b9be5c7de0ef 100644 (file)
--- a/main.py
+++ b/main.py
@@ -11,14 +11,11 @@ logger = getLogger()
 if __name__ == '__main__':
 
     config = (Path.cwd() / 'config') / 'learn.cfg'
 if __name__ == '__main__':
 
     config = (Path.cwd() / 'config') / 'learn.cfg'
-    engine = Engine(config_file = config)
-
-    engine.add_features()
-    engine.add_target()
-
-    engine.add_preprocessing()
-
-    engine.learn()
+    with Engine(config_file = config) as e:
+        e.add_features()
+        e.add_target()
+        e.add_preprocessing()
+        e.learn()
 
     '''target = toarea(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
 
 
     '''target = toarea(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
 
index e7bbf1c5aa58221da7a8aaa71788cf0339258cbc..a63ef2932aee9086fb23ede8902ec4ea0820adb8 100644 (file)
@@ -5,97 +5,150 @@ from logging.config import fileConfig
 from pathlib import Path
 from shutil import rmtree
 
 from pathlib import Path
 from shutil import rmtree
 
+import os
+
+from .learn.learning import Learning
+from .learn.preprocessing import Preprocessing
 from .source.ephemeris import Ephemeris
 from .source.holidays import Holidays
 from .source.ephemeris import Ephemeris
 from .source.holidays import Holidays
+from .source.ramadan import Ramadan
 from .source.meteofrance import MeteoFrance
 from .source.meteofrance import MeteoFrance
-from .learn.learning import Learning
-from .learn.preprocessing import Preprocessing
 from .target.target import Target
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 
 from .target.target import Target
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 
-class Engine:
+class Engine(object):
 
 
-    def __init__(self, config_file = (Path.cwd() / 'config') / 'learn.cfg'):
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        with open(str(self._file_name / os.path.basename(self._file_name)) + '.cfg', 'w') as f:
+            f.write(self._config_text)
+
+    def __init__(self, config_file=(Path.cwd() / 'config') / 'learn.cfg'):
         self._config = ConfigParser()
         self._config.read(config_file)
         self._config = ConfigParser()
         self._config.read(config_file)
+        launching_time = datetime.strftime(datetime.now(), '%Y_%m_%d_%H_%M')
+        self._name = os.path.splitext(os.path.basename(eval(self._config['TARGET']['config'])))[0]
+        self._file_name = f"{self._name}-{launching_time}"
+        p = Path.cwd() / 'results' / self._name
+        p.mkdir(exist_ok=True, parents=True)
+        self._file_name = p / self._file_name
+
+        self._config_text = ''
+        with open(config_file) as f:
+            self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+            self._config_text += f.read() + '\n\n'
+
         self._start = datetime.strptime(self._config['DATETIME']['start'],
                                         '%m/%d/%Y %H:%M:%S')
         self._end = datetime.strptime(self._config['DATETIME']['end'],
         self._start = datetime.strptime(self._config['DATETIME']['start'],
                                         '%m/%d/%Y %H:%M:%S')
         self._end = datetime.strptime(self._config['DATETIME']['end'],
-                                        '%m/%d/%Y %H:%M:%S')
+                                      '%m/%d/%Y %H:%M:%S')
 
 
-        self._timestep = timedelta(hours =
-                                   self._config['DATETIME'].getfloat('hourStep'))
+        self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
 
         self._X = {}
 
 
         self._X = {}
 
-
-
     def clean(self):
         # Cleaning the data directory
         logger.info("Cleaning and restoring data directory")
     def clean(self):
         # Cleaning the data directory
         logger.info("Cleaning and restoring data directory")
-        directory  = Path.cwd() / 'data'
+        directory = Path.cwd() / 'data'
         if directory.is_dir():
             rmtree(directory)
         p = Path(Path.cwd() / 'data')
         p.mkdir()
 
         if directory.is_dir():
             rmtree(directory)
         p = Path(Path.cwd() / 'data')
         p.mkdir()
 
-
     def add_features(self):
     def add_features(self):
-        if self._config['FEATURES'].getboolean('meteofrance'):
-            meteofeature = MeteoFrance(config_file =
-                                       eval(self._config['FEATURE_CONFIG']['meteofrance']))
-
-            meteofeature.start = self._start
-            meteofeature.end = self._end
-
-            meteofeature.update()
-            dated_features = meteofeature.dated_features
-            for date in dated_features:
-                self._X.setdefault(date,{}).update(dated_features[date])
-
         if self._config['FEATURES'].getboolean('ephemeris'):
         if self._config['FEATURES'].getboolean('ephemeris'):
-            ephemerides = Ephemeris(config_file =
-                                    eval(self._config['FEATURE_CONFIG']['ephemeris']))
+            config_file = eval(self._config['FEATURE_CONFIG']['ephemeris'])
+            with open(config_file) as f:
+                self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+                self._config_text += f.read() + '\n\n'
+
+            ephemerides = Ephemeris(config_file=config_file)
 
             ephemerides.start = self._start
             ephemerides.end = self._end
 
             dated_features = ephemerides.dated_features
             for date in dated_features:
 
             ephemerides.start = self._start
             ephemerides.end = self._end
 
             dated_features = ephemerides.dated_features
             for date in dated_features:
-                self._X.setdefault(date,{}).update(dated_features[date])
+                self._X.setdefault(date, {}).update(dated_features[date])
 
         if self._config['FEATURES'].getboolean('holidays'):
 
         if self._config['FEATURES'].getboolean('holidays'):
-            holidays = Holidays(config_file =
-                                eval(self._config['FEATURE_CONFIG']['holidays']))
+            config_file = eval(self._config['FEATURE_CONFIG']['holidays'])
+            with open(config_file) as f:
+                self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+                self._config_text += f.read() + '\n\n'
+
+            holidays = Holidays(config_file=config_file)
 
             holidays.start = self._start
             holidays.end = self._end
 
             dated_features = holidays.dated_features
             for date in dated_features:
 
             holidays.start = self._start
             holidays.end = self._end
 
             dated_features = holidays.dated_features
             for date in dated_features:
-                self._X.setdefault(date,{}).update(dated_features[date])
+                self._X.setdefault(date, {}).update(dated_features[date])
+
+        if self._config['FEATURES'].getboolean('meteofrance'):
+            config_file = eval(self._config['FEATURE_CONFIG']['meteofrance'])
+            with open(config_file) as f:
+                self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+                self._config_text += f.read() + '\n\n'
 
 
+            meteofeature = MeteoFrance(config_file=config_file)
+
+            meteofeature.start = self._start
+            meteofeature.end = self._end
+
+            meteofeature.update()
+            dated_features = meteofeature.dated_features
+            for date in dated_features:
+                self._X.setdefault(date, {}).update(dated_features[date])
+
+        if self._config['FEATURES'].getboolean('ramadan'):
+            config_file = eval(self._config['FEATURE_CONFIG']['ramadan'])
+            with open(config_file) as f:
+                self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+                self._config_text += f.read() + '\n\n'
+
+            ramadan = Ramadan(config_file=config_file)
+
+            ramadan.start = self._start
+            ramadan.end = self._end
+
+            dated_features = ramadan.dated_features
+            for date in dated_features:
+                self._X.setdefault(date, {}).update(dated_features[date])
 
     def add_target(self):
 
     def add_target(self):
-        self._target = Target(config_file = eval(self._config['TARGET']['config']),
-                              start = self._start, end = self._end,
-                              timestep = self._timestep)
+        config_file = eval(self._config['TARGET']['config'])
+        cumulative = self._config['TARGET'].getboolean('cumulative')
+        with open(config_file) as f:
+            self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+            self._config_text += f.read() + '\n\n'
 
 
+        self._target = Target(config_file=config_file,
+                              start=self._start, end=self._end,
+                              timestep=self._timestep, cumulative=cumulative)
 
     def add_preprocessing(self):
 
     def add_preprocessing(self):
-        self._preproc = Preprocessing(config_file = self._config,
-                                      dict_features = self.X,
-                                      dict_target = self.y)
-
+        self._preproc = Preprocessing(config_file=self._config,
+                                      dict_features=self.X,
+                                      dict_target=self.y)
 
     def learn(self):
 
     def learn(self):
-        history = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
-        self._learner = Learning(config_file = eval(self._config['LEARNER']['config']),
-                                 X = self._preproc.dataframe, y = list(self.y.values())[history:])
+        config_file = eval(self._config['LEARNER']['config'])
+        with open(config_file) as f:
+            self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+            self._config_text += f.read() + '\n\n'
 
 
+        history = eval(self._config['HISTORY_KNOWLEDGE']['nb_lines'])
+        self._learner = Learning(config_file=config_file, file_name=self._file_name,
+                                 X=self._preproc.dataframe, y=list(self.y.values())[history:],
+                                 horizon=self._config['TARGET'].getint('horizon'))
 
     @property
     def X(self):
 
     @property
     def X(self):
@@ -105,7 +158,6 @@ class Engine:
     def X(self, x):
         self._X = x
 
     def X(self, x):
         self._X = x
 
-
     @property
     def y(self):
         return self._target.y
     @property
     def y(self):
         return self._target.y
index 9a5860afaed8657890140c6dbffa79073bbe6787..959271ded77ad22aea436d98632ab7e97cf272d4 100644 (file)
 from configparser import ConfigParser
 from configparser import ConfigParser
+from logging import getLogger
+from logging.config import fileConfig
 from math import sqrt
 from math import sqrt
+from pathlib import Path
 from sklearn.metrics import mean_squared_error, mean_absolute_error
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, mean_absolute_error
 from sklearn.model_selection import train_test_split
+from statistics import mean, stdev
 
 
+import lightgbm as lgb
+import matplotlib
+import os
+import pylab as P
 import xgboost
 
 import xgboost
 
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
 class Learning:
 
 class Learning:
 
-    def __init__(self, config_file = None,
-                 X = None, y = None):
+    def __init__(self, config_file=None, file_name=None,
+                 X=None, y=None, horizon=0):
         self._config = ConfigParser()
         self._config.read(config_file)
         self._config = ConfigParser()
         self._config.read(config_file)
+        self._file_name = file_name
+        logger.info("Dealing with the horizon of prediction")
+        self._X = X[:-horizon]
+        self._y = y[horizon:]
+        self._learn()
+        self._evaluate()
 
 
-        df = X
-        df['cible'] = y
-
-        train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
-        train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
+    def _learn(self):
+        logger.info("Generation of learning sets")
+        self._df = self._X
+        self._df['cible'] = self._y
+        train_val_set, test_set = train_test_split(self._df, test_size=0.2, random_state=42)
+        train_set, val_set = train_test_split(train_val_set, test_size=0.2, random_state=42)
 
 
-        X_test = test_set.drop('cible', axis = 1)
-        y_test = test_set['cible'].copy()
+        self._X_test = test_set.drop('cible', axis=1)
+        self._y_test = test_set['cible'].copy()
 
         X_train = train_set.drop('cible', axis=1)
         y_train = train_set['cible'].copy()
         X_val = val_set.drop('cible', axis=1)
         y_val = val_set['cible'].copy()
 
 
         X_train = train_set.drop('cible', axis=1)
         y_train = train_set['cible'].copy()
         X_val = val_set.drop('cible', axis=1)
         y_val = val_set['cible'].copy()
 
-
+        logger.info("Start learning")
         if self._config['MODEL']['method'] == 'xgboost':
         if self._config['MODEL']['method'] == 'xgboost':
+            logger.info("Using xgboost regressor")
+            self._reg = xgboost.XGBRegressor(learning_rate=self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
+                                             max_depth=self._config['HYPERPARAMETERS'].getint('max_depth'),
+                                             random_state=self._config['HYPERPARAMETERS'].getint('random_state'),
+                                             n_estimators=self._config['HYPERPARAMETERS'].getint('n_estimators'),
+                                             n_jobs=self._config['HYPERPARAMETERS'].getint('n_jobs'),
+                                             objective='count:poisson')
+
+            self._reg.fit(X_train, y_train,
+                          eval_set=[(X_val, y_val)],
+                          early_stopping_rounds=10)
+        elif self._config['MODEL']['method'] == 'lightgbm':
+            train_data = lgb.Dataset(X_train, label=y_train)
+            val_data = lgb.Dataset(X_val, label=y_val)
+            num_round = self._config['HYPERPARAMETERS'].getint('num_round')
+            param = {
+                'learning_rate': self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
+                'metric': self._config['HYPERPARAMETERS'].get('metric'),
+                'num_iterations': self._config['HYPERPARAMETERS'].getint('num_iterations'),
+                'num_leaves': self._config['HYPERPARAMETERS'].getint('num_leaves'),
+                'objective': self._config['HYPERPARAMETERS'].get('objective')
+            }
+            self._reg = lgb.train(param, train_data, num_round, valid_sets=[val_data])
+
+    def _evaluate(self):
+        logger.info("Evaluation of the learner")
+        y_test_pred = self._reg.predict(self._X_test)
+        txt = f"Average interventions per time unit: {mean(self._df.cible)}\n"
+        txt += f"Standard deviation: {stdev(self._df.cible)}\n\n"
 
 
-            xgb_reg = xgboost.XGBRegressor(learning_rate = self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
-                                           max_depth     = self._config['HYPERPARAMETERS'].getint('max_depth'),
-                                           random_state  = self._config['HYPERPARAMETERS'].getint('random_state'),
-                                           n_estimators  = self._config['HYPERPARAMETERS'].getint('n_estimators'),
-                                           n_jobs        = self._config['HYPERPARAMETERS'].getint('n_jobs'),
-                                           objective     = 'count:poisson')
+        txt += f"Mean absolute error: {mean_absolute_error(y_test_pred, self._y_test)}\n"
+        txt += f"Root mean squared error: {sqrt(mean_squared_error(y_test_pred, self._y_test))}\n\n"
 
 
-            xgb_reg.fit(X_train, y_train,
-                        eval_set=[(X_val, y_val)],
-                        early_stopping_rounds=10)
+        for k in range(10):
+            txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
 
 
-            y_test_pred = xgb_reg.predict(X_test)
-            print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))
\ No newline at end of file
+        print(txt)
+        rep = (Path.cwd() / self._file_name)
+        rep.mkdir()
+        filename = str(self._file_name / os.path.basename(self._file_name))
+        with open(filename + ".result", 'w') as f:
+            f.write(txt)
+
+        y_true = self._df[self._df.year == self._df.year.max()].cible
+        x_true = self._df[self._df.year == self._df.year.max()].drop('cible', axis=1)
+
+        yy_test_pred = self._reg.predict(x_true)
+        P.figure(figsize=(36, 16))
+        P.plot(list(y_true)[:300], color='blue', label='actual')
+        P.plot(yy_test_pred[:300], color='red', label='predicted')
+        P.title('Predictions for 2018')
+        P.xlabel('Hour in the year')
+        P.ylabel('Number of cumulated interventions')
+        P.legend()
+        P.savefig(filename + ".png")
+
+        yy_test_pred = self._reg.predict(self._X_test)
+        P.figure(figsize=(36, 16))
+        P.plot(list(self._y_test)[:300], color='blue', label='actual')
+        P.plot(yy_test_pred[:300], color='red', label='predicted')
+        P.title('Predictions for test set')
+        P.xlabel('Hour in the year')
+        P.ylabel('Number of cumulated interventions')
+        P.legend()
+        P.savefig(filename + "-test.png")
+
+        if self._config['MODEL']['method'] == 'xgboost':
+            xgboost.plot_importance(self._reg)
+            fig = matplotlib.pyplot.gcf()
+            fig.set_size_inches(15, 130)
+            fig.savefig(filename + '-feat_importance.pdf')
index 885aad3393979b897e3e0d8c40f3378dbba08e5a..9bc09ad2eca2759c22b6047c3ded8ab747e015de 100644 (file)
@@ -14,6 +14,7 @@ import pandas as pd
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
+
 class Preprocessing:
     '''
     Generate a pandas dataframe from a dictionary of features per datetime, which
 class Preprocessing:
     '''
     Generate a pandas dataframe from a dictionary of features per datetime, which
@@ -25,8 +26,8 @@ class Preprocessing:
      - NaN values are then filled with last known values.
     '''
 
      - NaN values are then filled with last known values.
     '''
 
-    def __init__(self, config_file = None,
-                 dict_features = None, dict_target = None):
+    def __init__(self, config_file=None,
+                 dict_features=None, dict_target=None):
         '''
         Constructor that defines all needed attributes and collects features.
         '''
         '''
         Constructor that defines all needed attributes and collects features.
         '''
@@ -35,9 +36,8 @@ class Preprocessing:
         self._start = datetime.strptime(self._config['DATETIME']['start'],
                                         '%m/%d/%Y %H:%M:%S')
         self._end = datetime.strptime(self._config['DATETIME']['end'],
         self._start = datetime.strptime(self._config['DATETIME']['start'],
                                         '%m/%d/%Y %H:%M:%S')
         self._end = datetime.strptime(self._config['DATETIME']['end'],
-                                        '%m/%d/%Y %H:%M:%S')
-        self._timestep = timedelta(hours =
-                                   self._config['DATETIME'].getfloat('hourStep'))
+                                      '%m/%d/%Y %H:%M:%S')
+        self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
         self._dict_features = dict_features
         self._dict_target = dict_target
 
         self._dict_features = dict_features
         self._dict_target = dict_target
 
@@ -46,26 +46,28 @@ class Preprocessing:
         self._datetimes = []
 
         self._features = set(chain.from_iterable([tuple(u.keys())
         self._datetimes = []
 
         self._features = set(chain.from_iterable([tuple(u.keys())
-                                                      for u in [*dict_features.values()]]))
+                                                  for u in [*dict_features.values()]]))
 
         #feature_files = Path.cwd() / 'config' / 'features'
 
         #feature_files = Path.cwd() / 'config' / 'features'
-        self._features = {feat : {'numerical': False, 'categorical': False}
+        self._features = {feat: {'numerical': False, 'categorical': False}
                           for feat in self._features}
 
         for feature in self._config['FEATURES']:
             if self._config['FEATURES'][feature]:
                 feature_file = self._config['FEATURE_CONFIG'][feature]
                 config = ConfigParser()
                           for feat in self._features}
 
         for feature in self._config['FEATURES']:
             if self._config['FEATURES'][feature]:
                 feature_file = self._config['FEATURE_CONFIG'][feature]
                 config = ConfigParser()
-                config.read(feature_file)
+                config.read(eval(feature_file))
                 for section in config:
                     if config.has_option(section, 'numerical'):
                 for section in config:
                     if config.has_option(section, 'numerical'):
-                        self._features[section]['numerical'] = config[section].getboolean('numerical')
-                        self._features[section]['categorical'] = config[section].getboolean('categorical')
+                        for feature in self._features:
+                            if feature.split('_')[0] == section:
+                                self._features[feature]['binary'] = config[section].getboolean('binary')
+                                self._features[feature]['categorical'] = config[section].getboolean('categorical')
+                                self._features[feature]['numerical'] = config[section].getboolean('numerical')
 
 
-        self._numerical_columns = [k for k in self._features if self._features[k]['numerical']]
+        self._binary_columns = [k for k in self._features if self._features[k]['binary']]
         self._categorical_columns = [k for k in self._features if self._features[k]['categorical']]
         self._categorical_columns = [k for k in self._features if self._features[k]['categorical']]
-
-
+        self._numerical_columns = [k for k in self._features if self._features[k]['numerical']]
 
     @property
     def start(self):
 
     @property
     def start(self):
@@ -75,7 +77,6 @@ class Preprocessing:
     def start(self, x):
         self._start = x
 
     def start(self, x):
         self._start = x
 
-
     @property
     def end(self):
         return self._end
     @property
     def end(self):
         return self._end
@@ -84,7 +85,6 @@ class Preprocessing:
     def end(self, x):
         self._end = x
 
     def end(self, x):
         self._end = x
 
-
     @property
     def timestep(self):
         return self._timestep
     @property
     def timestep(self):
         return self._timestep
@@ -93,7 +93,6 @@ class Preprocessing:
     def timestep(self, x):
         self._timestep = x
 
     def timestep(self, x):
         self._timestep = x
 
-
     def _fill_dict(self):
         '''
         Add datetime keys in the dated feature dictionary that are missing. The
     def _fill_dict(self):
         '''
         Add datetime keys in the dated feature dictionary that are missing. The
@@ -105,16 +104,16 @@ class Preprocessing:
         while current <= self._end:
             self._datetimes.append(current)
             if current not in self._dict_features:
         while current <= self._end:
             self._datetimes.append(current)
             if current not in self._dict_features:
-                self._dict_features[current] = {feature:np.NaN
+                self._dict_features[current] = {feature: np.NaN
                                                 for feature in self._features}
             else:
                                                 for feature in self._features}
             else:
-                null_dict = {feature:np.NaN
+                null_dict = {feature: np.NaN
                              for feature in self._features}
                 null_dict.update(self._dict_features[current])
                 self._dict_features[current] = null_dict
             current += self._timestep
         for k in self._dict_features:
                              for feature in self._features}
                 null_dict.update(self._dict_features[current])
                 self._dict_features[current] = null_dict
             current += self._timestep
         for k in self._dict_features:
-            null_dict = {feature:np.NaN
+            null_dict = {feature: np.NaN
                          for feature in self._features}
             null_dict.update(self._dict_features[k])
             self._dict_features[k] = null_dict
                          for feature in self._features}
             null_dict.update(self._dict_features[k])
             self._dict_features[k] = null_dict
@@ -122,8 +121,6 @@ class Preprocessing:
         self._full_dict = {k: self._dict_features[k]
                            for k in sorted(self._dict_features.keys())}
 
         self._full_dict = {k: self._dict_features[k]
                            for k in sorted(self._dict_features.keys())}
 
-
-
     @property
     def full_dict(self):
         '''
     @property
     def full_dict(self):
         '''
@@ -133,7 +130,6 @@ class Preprocessing:
             self._fill_dict()
         return self._full_dict
 
             self._fill_dict()
         return self._full_dict
 
-
     def _fill_nan(self):
         '''
         Fill NaN values, either by propagation or by interpolation (linear or splines)
     def _fill_nan(self):
         '''
         Fill NaN values, either by propagation or by interpolation (linear or splines)
@@ -150,7 +146,7 @@ class Preprocessing:
         elif self._config['PREPROCESSING']['fill_method'] == 'spline':
             self._dataframe[self._numerical_columns] =\
                 self._dataframe[self._numerical_columns].interpolate(method='spline',
         elif self._config['PREPROCESSING']['fill_method'] == 'spline':
             self._dataframe[self._numerical_columns] =\
                 self._dataframe[self._numerical_columns].interpolate(method='spline',
-                     order=self._config['PREPROCESSING'].getint('order'))
+                                                                     order=self._config['PREPROCESSING'].getint('order'))
 
         # For the categorical columns, NaN values are filled by duplicating
         # the last known value (forward fill method)
 
         # For the categorical columns, NaN values are filled by duplicating
         # the last known value (forward fill method)
@@ -173,21 +169,18 @@ class Preprocessing:
         self._dataframe = self._dataframe.drop(['row_ok'], axis=1)
         logger.info("Rows dropped")
 
         self._dataframe = self._dataframe.drop(['row_ok'], axis=1)
         logger.info("Rows dropped")
 
-
     def _add_history(self):
         '''
         Integrating previous nb of interventions as features
         '''
         logger.info("Integrating previous nb of interventions as features")
     def _add_history(self):
         '''
         Integrating previous nb of interventions as features
         '''
         logger.info("Integrating previous nb of interventions as features")
-        nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
-        for k in range(1,nb_lines+1):
-            name = 'history_'+str(nb_lines-k+1)
-            self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
+        nb_lines = eval(self._config['HISTORY_KNOWLEDGE']['nb_lines'])
+        for k in range(1, nb_lines + 1):
+            name = 'history_' + str(nb_lines - k + 1)
+            self._dataframe[name] = [np.NaN] * k + list(self._dict_target.values())[:-k]
             self._numerical_columns.append(name)
         self._dataframe = self._dataframe[nb_lines:]
 
             self._numerical_columns.append(name)
         self._dataframe = self._dataframe[nb_lines:]
 
-
-
     def _standardize(self):
         '''
         Normalizing numerical features
     def _standardize(self):
         '''
         Normalizing numerical features
@@ -197,26 +190,25 @@ class Preprocessing:
         self._dataframe[self._numerical_columns] =\
             preprocessing.scale(self._dataframe[self._numerical_columns])
 
         self._dataframe[self._numerical_columns] =\
             preprocessing.scale(self._dataframe[self._numerical_columns])
 
-
-
     def _one_hot_encoding(self):
         '''
         Apply a one hot encoding for category features
         '''
         logger.info("One hot encoding for categorical feature")
     def _one_hot_encoding(self):
         '''
         Apply a one hot encoding for category features
         '''
         logger.info("One hot encoding for categorical feature")
-
         # We store numerical columns
         df_out = pd.DataFrame()
         # We store numerical columns
         df_out = pd.DataFrame()
-        for col in  self._numerical_columns:
+        for col in self._numerical_columns:
+            df_out[col] = self._dataframe[col]
+        # Idem for binary features
+        for col in self._binary_columns:
             df_out[col] = self._dataframe[col]
         # The one hot encoding
         for col in self._categorical_columns:
             df_out[col] = self._dataframe[col]
         # The one hot encoding
         for col in self._categorical_columns:
-            pd1 = pd.get_dummies(self._dataframe[col],prefix=col)
+            pd1 = pd.get_dummies(self._dataframe[col], prefix=col)
             for col1 in pd1.columns:
                 df_out[col1] = pd1[col1]
         self._dataframe = df_out
 
             for col1 in pd1.columns:
                 df_out[col1] = pd1[col1]
         self._dataframe = df_out
 
-
     @property
     def dataframe(self):
         '''
     @property
     def dataframe(self):
         '''
@@ -236,7 +228,6 @@ class Preprocessing:
             self._one_hot_encoding()
         return self._dataframe
 
             self._one_hot_encoding()
         return self._dataframe
 
-
     @dataframe.setter
     def dataframe(self, df):
         self._dataframe = df
     @dataframe.setter
     def dataframe(self, df):
         self._dataframe = df
index 1a536fe102e52b930314105006b8f3950fbd9f64..6893db03ef5ef7eb804d3f1d9ec391cf18a2dcaf 100644 (file)
@@ -3,6 +3,7 @@ from datetime import datetime, timedelta
 from jours_feries_france.compute import JoursFeries
 from logging import getLogger
 from logging.config import fileConfig
 from jours_feries_france.compute import JoursFeries
 from logging import getLogger
 from logging.config import fileConfig
+from pathlib import Path
 from vacances_scolaires_france import SchoolHolidayDates
 
 import itertools
 from vacances_scolaires_france import SchoolHolidayDates
 
 import itertools
@@ -90,17 +91,28 @@ class Holidays:
             bankHolidaysEve = tuple(u-timedelta(days=1) for u in bankHolidays)
             name = self._config['ZONE']['name']
             date = self._start
             bankHolidaysEve = tuple(u-timedelta(days=1) for u in bankHolidays)
             name = self._config['ZONE']['name']
             date = self._start
+            Date = datetime.date(date)
+            tomorrow = date + timedelta(days=1)
+            Tomorrow = datetime.date(tomorrow)
             d = SchoolHolidayDates()
             d = SchoolHolidayDates()
+            dict_hour = {
+                'bankHolidays' : Date in bankHolidays,
+                'bankHolidaysEve': Date in bankHolidaysEve,
+                'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
+                'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
+            }
             while date <= self._end:
             while date <= self._end:
-                Date = datetime.date(date)
-                tomorrow = date + timedelta(days=1)
-                Tomorrow = datetime.date(tomorrow)
-                dict_hour = {
-                    'bankHolidays' : Date in bankHolidays,
-                    'bankHolidaysEve': Date in bankHolidaysEve,
-                    'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
-                    'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
-                }
                 self._dated_features[date] = dict_hour
                 self._dated_features[date] = dict_hour
+                current = date
                 date += timedelta(hours=1)
                 date += timedelta(hours=1)
+                if date.day != current.day:
+                    Date = datetime.date(date)
+                    tomorrow = date + timedelta(days=1)
+                    Tomorrow = datetime.date(tomorrow)
+                    dict_hour = {
+                        'bankHolidays' : Date in bankHolidays,
+                        'bankHolidaysEve': Date in bankHolidaysEve,
+                        'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
+                        'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
+                    }
         return self._dated_features
\ No newline at end of file
         return self._dated_features
\ No newline at end of file
index ff6a238c534a1d4aa5ab807e0b85a43554722c86..0edd49f544f33ed07b17c93dc7b1e493a6d0e6e5 100644 (file)
@@ -247,7 +247,7 @@ class MeteoFrance:
                 if (date >= self._start and date <= self._end)\
                 or (date.year == self._start.year and date.month == self._start.month)\
                 or (date.year == self._end.year and date.month == self._end.month):
                 if (date >= self._start and date <= self._end)\
                 or (date.year == self._start.year and date.month == self._start.month)\
                 or (date.year == self._end.year and date.month == self._end.month):
-                    logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+                    logger.info(f'Adding meteofrance features from {csv_meteo}')
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
diff --git a/predictops/source/ramadan.py b/predictops/source/ramadan.py
new file mode 100644 (file)
index 0000000..6836df1
--- /dev/null
@@ -0,0 +1,71 @@
+from configparser import ConfigParser
+from convertdate import islamic
+from datetime import datetime, timedelta
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Ramadan:
+
+    _start = None
+    _end = None
+
+    def __init__(self, config_file):
+
+        self._config = ConfigParser()
+        self._config.read(config_file)
+
+        # Collecting holidays features
+        self._features = [section for section in self._config
+                          if self._config[section].getboolean('numerical')
+                          or self._config[section].getboolean('categorical')]
+
+        self._dated_features = {}
+
+    @property
+    def start(self):
+        return self._start
+
+    @start.setter
+    def start(self, x):
+        self._start = x
+
+    @property
+    def end(self):
+        return self._end
+
+    @end.setter
+    def end(self, x):
+        self._end = x
+
+    @property
+    def dated_features(self):
+        if self._dated_features == {}:
+            logger.info("Adding Ramadan features")
+            date = self._start
+            while date <= self._end:
+                year, month, day = date.year, date.month, date.day
+                eve = datetime(year, month, day) - timedelta(days=1)
+                tomorrow = datetime(year, month, day) + timedelta(days=1)
+                Hegirian_month = islamic.from_gregorian(year, month, day)[1]
+                dict_hour = {
+                    'ramadanEve': False,
+                    'ramadan': False,
+                    'ramadanDayAfter': False
+                }
+                if Hegirian_month == 8 and\
+                   islamic.from_gregorian(tomorrow.year, tomorrow.month, tomorrow.day)[1] == 9:
+                    dict_hour['ramadanEve'] = True
+                elif Hegirian_month == 9:
+                    dict_hour['ramadan'] = True
+                elif Hegirian_month == 10 and\
+                        islamic.from_gregorian(eve.year, eve.month, eve.day)[1] == 9:
+                    dict_hour['ramadanDayAfter'] = True
+                self._dated_features[date] = dict_hour
+                date += timedelta(hours=1)
+        return self._dated_features
index b998120b1be6b3b5524e8263c155370b6edceb98..9e3d86d31de82f24ad8a8c2bd6abf6e742f446fe 100644 (file)
@@ -12,7 +12,7 @@ logger = getLogger()
 class Target:
 
     def __init__(self, config_file = None,
 class Target:
 
     def __init__(self, config_file = None,
-                 start = None, end = None, timestep = None):
+                 start = None, end = None, timestep = None, cumulative = None):
 
         self._config = ConfigParser()
         self._config.read(config_file)
 
         self._config = ConfigParser()
         self._config.read(config_file)
@@ -20,6 +20,7 @@ class Target:
         self._start = start
         self._end = end
         self._timestep = timestep
         self._start = start
         self._end = end
         self._timestep = timestep
+        self._cumulative = cumulative
 
         logger.info('Initialization of target variable')
         self._y = {}
 
         logger.info('Initialization of target variable')
         self._y = {}
@@ -68,36 +69,39 @@ class Target:
             logger.info('Integrating interventions for the whole area')
             with open(self._stream_file) as f:
                 reader = DictReader(f, delimiter=',')
             logger.info('Integrating interventions for the whole area')
             with open(self._stream_file) as f:
                 reader = DictReader(f, delimiter=',')
-                for row in reader:
-                    if row['start'] != '':
-                        start_interv = datetime.strptime(row['start'], '%d/%m/%Y %H:%M:%S')
-                        start_interv = start_interv.replace(minute=0)
-                        end_interv = datetime.strptime(row['end'], '%d/%m/%Y %H:%M:%S')
-                        end_interv = end_interv.replace(minute=0)
-                        if not (start_interv > self._end or end_interv < self._start):
-                            if start_interv < self._start and end_interv <= self._end:
-                                current = self._start
-                                while current <= end_interv:
-                                    self._y[current] += 1
-                                    current += self._timestep
-                            elif start_interv >= self._start and end_interv > self._end:
-                                current = start_interv
-                                while current not in self._y:
-                                    current -= timedelta(hours=1)
-                                while current <= self._end:
-                                    self._y[current] += 1
-                                    current += self._timestep
-                            elif start_interv >= self._start and end_interv <= self._end:
-                                current = start_interv
-                                while current not in self._y:
-                                    current -= timedelta(hours=1)
-                                while current <= end_interv:
-                                    self._y[current] += 1
-                                    current += self._timestep
-
-
-
-
-
+                if self._cumulative:
+                    for row in reader:
+                        if row['start'] != '':
+                            start_interv = datetime.strptime(row['start'], '%d/%m/%Y %H:%M:%S')
+                            start_interv = start_interv.replace(minute=0)
+                            end_interv = datetime.strptime(row['end'], '%d/%m/%Y %H:%M:%S')
+                            end_interv = end_interv.replace(minute=0)
+                            if not (start_interv > self._end or end_interv < self._start):
+                                if start_interv < self._start and end_interv <= self._end:
+                                    current = self._start
+                                    while current <= end_interv:
+                                        self._y[current] += 1
+                                        current += self._timestep
+                                elif start_interv >= self._start and end_interv > self._end:
+                                    current = start_interv
+                                    while current not in self._y:
+                                        current -= timedelta(hours=1)
+                                    while current <= self._end:
+                                        self._y[current] += 1
+                                        current += self._timestep
+                                elif start_interv >= self._start and end_interv <= self._end:
+                                    current = start_interv
+                                    while current not in self._y:
+                                        current -= timedelta(hours=1)
+                                    while current <= end_interv:
+                                        self._y[current] += 1
+                                        current += self._timestep
+                else:
+                    for row in reader:
+                        if row['start'] != '':
+                            start_interv = datetime.strptime(row['start'], '%d/%m/%Y %H:%M:%S')
+                            start_interv = start_interv.replace(minute=0)
+                            if start_interv in self._y:
+                                self._y[start_interv] += 1
 
 
 
 
index 3e40e81e4f54c2c996a71f4146c13a9ff589b8f8..6615f1fc4c5a72ac226089c20e20c6f9babcaef1 100644 (file)
@@ -2,14 +2,22 @@ attrs==19.3.0
 Click==7.0
 click-plugins==1.1.1
 cligj==0.5.0
 Click==7.0
 click-plugins==1.1.1
 cligj==0.5.0
+convertdate==2.2.0
+cycler==0.10.0
 Fiona==1.8.13
 geographiclib==1.50
 geopandas==0.6.3
 geopy==1.21.0
 joblib==0.14.1
 Fiona==1.8.13
 geographiclib==1.50
 geopandas==0.6.3
 geopy==1.21.0
 joblib==0.14.1
+jours-feries-france==0.5.1
+kiwisolver==1.1.0
+lightgbm==2.3.1
+matplotlib==3.1.3
 munch==2.5.0
 numpy==1.18.1
 pandas==1.0.1
 munch==2.5.0
 numpy==1.18.1
 pandas==1.0.1
+PyMeeus==0.3.6
+pyparsing==2.4.6
 pyproj==2.4.2.post1
 python-dateutil==2.8.1
 pytz==2019.3
 pyproj==2.4.2.post1
 python-dateutil==2.8.1
 pytz==2019.3
@@ -17,5 +25,6 @@ scikit-learn==0.22.1
 scipy==1.4.1
 Shapely==1.7.0
 six==1.14.0
 scipy==1.4.1
 Shapely==1.7.0
 six==1.14.0
+vacances-scolaires-france==0.7.0
 xgboost==0.90
 xlrd==1.2.0
 xgboost==0.90
 xlrd==1.2.0