lightgbm is now working

[predictops.git] / predictops / source / meteofrance.py
diff --git a/predictops/source/meteofrance.py b/predictops/source/meteofrance.py

index 6bd23edc1435b857c5a2a00150778870ad30ebd7..0edd49f544f33ed07b17c93dc7b1e493a6d0e6e5 100644 (file)
--- a/predictops/source/meteofrance.py
+++ b/predictops/source/meteofrance.py
@@ -1,5 +1,3 @@
-from .source import Source
-
  from configparser import ConfigParser
  from csv import DictReader
  from datetime import datetime
  from configparser import ConfigParser
  from csv import DictReader
  from datetime import datetime
@@ -18,10 +16,8 @@ import gzip
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
-CSV_FILE = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
-
  
  
-class MeteoFrance(Source):
+class MeteoFrance:
  
      _latitude    = None
      _longitude   = None
  
      _latitude    = None
      _longitude   = None
@@ -33,31 +29,13 @@ class MeteoFrance(Source):
      def __init__(self, config_file):
          '''
          Constructor of the MeteoFrance source of feature.
      def __init__(self, config_file):
          '''
          Constructor of the MeteoFrance source of feature.
-
-        - It will reinitiate the data directory, if asked in the config
-          features.cfg file.
-        - It searches for the nb_stations meteo stations closest to the provided
-          point (longitude and latitude)
-
-        For more information about this source of feature, see:
-    https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
-
-        Parameters:
-          - in config file:
-            latitude (float): The latitude from which we want the meteo features.
-            longitude (float): The longitude from which we want the meteo features.
-            nb_stations (int): Number of closest stations to consider.
-          - provided to the constructor
-            features (list): Weather features that have to be integrated, according
-                  to their names in meteofrance_features.csv (cf. config directory)
-
          '''
          '''
-        # Check for the integrity of feature names
-        Source.__init__(self)
-
          self._config = ConfigParser()
          self._config.read(config_file)
  
          self._config = ConfigParser()
          self._config.read(config_file)
  
+        self._latitude = self._config['POSITION'].getfloat('latitude')
+        self._longitude = self._config['POSITION'].getfloat('longitude')
+
          self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
  
          self._dated_features = None
          self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
  
          self._dated_features = None
@@ -71,10 +49,11 @@ class MeteoFrance(Source):
          self._stations = self._get_stations()
  
          # Collecting meteofrance features
          self._stations = self._get_stations()
  
          # Collecting meteofrance features
-        with open(CSV_FILE, "r") as f:
-            reader = DictReader(f, delimiter=',')
-            self._features = [row['name'] for row in reader
-                              if self._config['FEATURES'].getboolean(row['name'])]
+        self._features = [section for section in self._config
+                          if self._config.has_option(section, 'numerical')
+                          and (self._config[section]['numerical'] or
+                               self._config[section]['categorical'])]
+
  
  
      @property
  
  
      @property
@@ -252,26 +231,23 @@ class MeteoFrance(Source):
              dict: the dictionary of features per datestamp
          '''
          if self._dated_features == None:
              dict: the dictionary of features per datestamp
          '''
          if self._dated_features == None:
-            logger.info(f'Collecting meteo feature information from {CSV_FILE}')
+            logger.info('Collecting meteofrance feature information')
              # A dictionary for the features
              # A dictionary for the features
-            with open(CSV_FILE, "r") as f:
-                reader = DictReader(f, delimiter=',')
-                dico_features = {row["abbreviation"]:
-                                   {
-                                       'name': row['name'], # feature name
-                                       'type': row['type']  # qualitative (2) or quantitative (1)
-                                    }
-                                for row in reader if row['name'] in self._features}
-                #print([row for row in reader])
-                #print([row for row in reader if row['name'] in self._features])
+            dico_features = {self._config[section]["abbreviation"]:
+                               {
+                                   'name': section, # feature name
+                                   'numerical': self._config[section]['numerical'],
+                                   'categorical': self._config[section]['categorical']
+                                }
+                            for section in self._features}
              dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
              self._dated_features = {}
              dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
              self._dated_features = {}
-            for csv_meteo in listdir(dir_data):
+            for csv_meteo in sorted(listdir(dir_data)):
                  date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                  if (date >= self._start and date <= self._end)\
                  or (date.year == self._start.year and date.month == self._start.month)\
                  or (date.year == self._end.year and date.month == self._end.month):
                  date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                  if (date >= self._start and date <= self._end)\
                  or (date.year == self._start.year and date.month == self._start.month)\
                  or (date.year == self._end.year and date.month == self._end.month):
-                    logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+                    logger.info(f'Adding meteofrance features from {csv_meteo}')
                      with open(dir_data / csv_meteo, "r") as f:
                          reader = DictReader(f, delimiter=';')
                          for row in reader:
                      with open(dir_data / csv_meteo, "r") as f:
                          reader = DictReader(f, delimiter=';')
                          for row in reader: