]> AND Private Git Repository - predictops.git/blobdiff - predictops/source/meteofrance.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Reducing the computation time and adding holidays features
[predictops.git] / predictops / source / meteofrance.py
index b26c6bf6525f0a87d1cba73d251e7937c89beac6..ff6a238c534a1d4aa5ab807e0b85a43554722c86 100644 (file)
@@ -1,5 +1,3 @@
-from .source import Source
-
 from configparser import ConfigParser
 from csv import DictReader
 from datetime import datetime
@@ -18,10 +16,8 @@ import gzip
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
-CSV_FILE = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
-
 
-class MeteoFrance(Source):
+class MeteoFrance:
 
     _latitude    = None
     _longitude   = None
@@ -33,28 +29,7 @@ class MeteoFrance(Source):
     def __init__(self, config_file):
         '''
         Constructor of the MeteoFrance source of feature.
-
-        - It will reinitiate the data directory, if asked in the config
-          features.cfg file.
-        - It searches for the nb_stations meteo stations closest to the provided
-          point (longitude and latitude)
-
-        For more information about this source of feature, see:
-    https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
-
-        Parameters:
-          - in config file:
-            latitude (float): The latitude from which we want the meteo features.
-            longitude (float): The longitude from which we want the meteo features.
-            nb_stations (int): Number of closest stations to consider.
-          - provided to the constructor
-            features (list): Weather features that have to be integrated, according
-                  to their names in meteofrance_features.csv (cf. config directory)
-
         '''
-        # Check for the integrity of feature names
-        Source.__init__(self)
-
         self._config = ConfigParser()
         self._config.read(config_file)
 
@@ -74,10 +49,11 @@ class MeteoFrance(Source):
         self._stations = self._get_stations()
 
         # Collecting meteofrance features
-        with open(CSV_FILE, "r") as f:
-            reader = DictReader(f, delimiter=',')
-            self._features = [row['name'] for row in reader
-                              if self._config['FEATURES'].getboolean(row['name'])]
+        self._features = [section for section in self._config
+                          if self._config.has_option(section, 'numerical')
+                          and (self._config[section]['numerical'] or
+                               self._config[section]['categorical'])]
+
 
 
     @property
@@ -255,21 +231,18 @@ class MeteoFrance(Source):
             dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
-            logger.info(f'Collecting meteo feature information from {CSV_FILE}')
+            logger.info('Collecting meteofrance feature information')
             # A dictionary for the features
-            with open(CSV_FILE, "r") as f:
-                reader = DictReader(f, delimiter=',')
-                dico_features = {row["abbreviation"]:
-                                   {
-                                       'name': row['name'], # feature name
-                                       'type': row['type']  # qualitative (2) or quantitative (1)
-                                    }
-                                for row in reader if row['name'] in self._features}
-                #print([row for row in reader])
-                #print([row for row in reader if row['name'] in self._features])
+            dico_features = {self._config[section]["abbreviation"]:
+                               {
+                                   'name': section, # feature name
+                                   'numerical': self._config[section]['numerical'],
+                                   'categorical': self._config[section]['categorical']
+                                }
+                            for section in self._features}
             dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
             self._dated_features = {}
-            for csv_meteo in listdir(dir_data):
+            for csv_meteo in sorted(listdir(dir_data)):
                 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                 if (date >= self._start and date <= self._end)\
                 or (date.year == self._start.year and date.month == self._start.month)\