]> AND Private Git Repository - predictops.git/blobdiff - predictops/source/meteofrance.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
XGBoost integrated
[predictops.git] / predictops / source / meteofrance.py
index 2326e16c92e2f097a7e4853221e7006d5008becd..b26c6bf6525f0a87d1cba73d251e7937c89beac6 100644 (file)
@@ -1,3 +1,5 @@
+from .source import Source
+
 from configparser import ConfigParser
 from csv import DictReader
 from datetime import datetime
@@ -16,12 +18,19 @@ import gzip
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
-class MeteoFrance:
+CSV_FILE = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
+
+
+class MeteoFrance(Source):
+
+    _latitude    = None
+    _longitude   = None
+    _nb_stations = None
+    _start       = None
+    _end         = None
+    _features    = None
 
-    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
-                 start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
-                 end = datetime.now(),
-                 features = []):
+    def __init__(self, config_file):
         '''
         Constructor of the MeteoFrance source of feature.
 
@@ -34,33 +43,86 @@ class MeteoFrance:
     https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
 
         Parameters:
+          - in config file:
             latitude (float): The latitude from which we want the meteo features.
             longitude (float): The longitude from which we want the meteo features.
             nb_stations (int): Number of closest stations to consider.
+          - provided to the constructor
             features (list): Weather features that have to be integrated, according
                   to their names in meteofrance_features.csv (cf. config directory)
 
         '''
-        self._latitude = latitude
-        self._longitude = longitude
-        self._nb_stations = nb_stations
-        self._start = start
-        self._end = end
-        self._features = features
+        # Check for the integrity of feature names
+        Source.__init__(self)
+
+        self._config = ConfigParser()
+        self._config.read(config_file)
+
+        self._latitude = self._config['POSITION'].getfloat('latitude')
+        self._longitude = self._config['POSITION'].getfloat('longitude')
 
         self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
 
         self._dated_features = None
 
         # Re-creating data directory architecture for MeteoFrance, if asked
-        config = ConfigParser()
-        config.read((Path.cwd() / 'config') / 'features.cfg')
-        if eval(config['meteofrance']['regenerate']):
+        if self._config['GENERAL'].getboolean('regenerate'):
             self._regenerate_directory()
 
         # Collecting the closest meteo station
+        self._nb_stations = self._config['STATIONS'].getint('nb_stations')
         self._stations = self._get_stations()
 
+        # Collecting meteofrance features
+        with open(CSV_FILE, "r") as f:
+            reader = DictReader(f, delimiter=',')
+            self._features = [row['name'] for row in reader
+                              if self._config['FEATURES'].getboolean(row['name'])]
+
+
+    @property
+    def start(self):
+        return self._start
+
+    @start.setter
+    def start(self, x):
+        self._start = x
+
+
+    @property
+    def end(self):
+        return self._end
+
+    @end.setter
+    def end(self, x):
+        self._end = x
+
+
+    @property
+    def latitude(self):
+        return self._latitude
+
+    @latitude.setter
+    def latitude(self, x):
+        self._latitude = x
+
+
+    @property
+    def longitude(self):
+        return self._longitude
+
+    @longitude.setter
+    def longitude(self, x):
+        self._longitude = x
+
+
+    @property
+    def nb_stations(self):
+        return self._nb_stations
+
+    @nb_stations.setter
+    def nb_stations(self, x):
+        self._nb_stations = x
 
 
     def _regenerate_directory(self):
@@ -193,10 +255,9 @@ class MeteoFrance:
             dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
-            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
-            logger.info(f'Collecting meteo feature information from {csv_file}')
+            logger.info(f'Collecting meteo feature information from {CSV_FILE}')
             # A dictionary for the features
-            with open(csv_file, "r") as f:
+            with open(CSV_FILE, "r") as f:
                 reader = DictReader(f, delimiter=',')
                 dico_features = {row["abbreviation"]:
                                    {
@@ -204,17 +265,22 @@ class MeteoFrance:
                                        'type': row['type']  # qualitative (2) or quantitative (1)
                                     }
                                 for row in reader if row['name'] in self._features}
+                #print([row for row in reader])
+                #print([row for row in reader if row['name'] in self._features])
             dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
             self._dated_features = {}
             for csv_meteo in listdir(dir_data):
                 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
-                if date >= self._start and date <= self._end:
+                if (date >= self._start and date <= self._end)\
+                or (date.year == self._start.year and date.month == self._start.month)\
+                or (date.year == self._end.year and date.month == self._end.month):
                     logger.info(f'Inserting {csv_meteo} in intervention dictionary')
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
                             if row['numer_sta'] in self._stations:
                                 date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
-                                self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                                if date  >= self._start and date <= self._end:
+                                    self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
         return self._dated_features