]> AND Private Git Repository - predictops.git/blobdiff - predictops/source/meteofrance.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Refactoring, fin du lever/coucher de soleil, et début de sentinelles
[predictops.git] / predictops / source / meteofrance.py
index 2326e16c92e2f097a7e4853221e7006d5008becd..05eb7104563ba6ce7a2b2cab653ee3463b88642e 100644 (file)
@@ -16,52 +16,84 @@ import gzip
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
+
 class MeteoFrance:
 
 class MeteoFrance:
 
-    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
-                 start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
-                 end = datetime.now(),
-                 features = []):
+    _latitude = None
+    _longitude = None
+    _nb_stations = None
+    _start = None
+    _end = None
+    _features = None
+
+    def __init__(self, config_file):
         '''
         Constructor of the MeteoFrance source of feature.
         '''
         Constructor of the MeteoFrance source of feature.
-
-        - It will reinitiate the data directory, if asked in the config
-          features.cfg file.
-        - It searches for the nb_stations meteo stations closest to the provided
-          point (longitude and latitude)
-
-        For more information about this source of feature, see:
-    https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
-
-        Parameters:
-            latitude (float): The latitude from which we want the meteo features.
-            longitude (float): The longitude from which we want the meteo features.
-            nb_stations (int): Number of closest stations to consider.
-            features (list): Weather features that have to be integrated, according
-                  to their names in meteofrance_features.csv (cf. config directory)
-
         '''
         '''
-        self._latitude = latitude
-        self._longitude = longitude
-        self._nb_stations = nb_stations
-        self._start = start
-        self._end = end
-        self._features = features
+        self._config = ConfigParser()
+        self._config.read(config_file)
+
+        self._latitude = self._config['POSITION'].getfloat('latitude')
+        self._longitude = self._config['POSITION'].getfloat('longitude')
 
         self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
 
         self._dated_features = None
 
         # Re-creating data directory architecture for MeteoFrance, if asked
 
         self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
 
         self._dated_features = None
 
         # Re-creating data directory architecture for MeteoFrance, if asked
-        config = ConfigParser()
-        config.read((Path.cwd() / 'config') / 'features.cfg')
-        if eval(config['meteofrance']['regenerate']):
+        if self._config['GENERAL'].getboolean('regenerate'):
             self._regenerate_directory()
 
         # Collecting the closest meteo station
             self._regenerate_directory()
 
         # Collecting the closest meteo station
+        self._nb_stations = self._config['STATIONS'].getint('nb_stations')
         self._stations = self._get_stations()
 
         self._stations = self._get_stations()
 
+        # Collecting meteofrance features
+        self._features = [section for section in self._config
+                          if self._config.has_option(section, 'numerical')
+                          and (self._config[section]['binary'] or
+                               self._config[section]['categorical'] or
+                               self._config[section]['numerical'])]
+
+    @property
+    def start(self):
+        return self._start
+
+    @start.setter
+    def start(self, x):
+        self._start = x
+
+    @property
+    def end(self):
+        return self._end
 
 
+    @end.setter
+    def end(self, x):
+        self._end = x
+
+    @property
+    def latitude(self):
+        return self._latitude
+
+    @latitude.setter
+    def latitude(self, x):
+        self._latitude = x
+
+    @property
+    def longitude(self):
+        return self._longitude
+
+    @longitude.setter
+    def longitude(self, x):
+        self._longitude = x
+
+    @property
+    def nb_stations(self):
+        return self._nb_stations
+
+    @nb_stations.setter
+    def nb_stations(self, x):
+        self._nb_stations = x
 
     def _regenerate_directory(self):
         '''
 
     def _regenerate_directory(self):
         '''
@@ -77,8 +109,6 @@ class MeteoFrance:
         p = Path(self._data_directory / 'config')
         p.mkdir(exist_ok=True, parents=True)
 
         p = Path(self._data_directory / 'config')
         p.mkdir(exist_ok=True, parents=True)
 
-
-
     def _get_stations(self):
         '''
         Collect (after downloading them, if needed) the stations and their
     def _get_stations(self):
         '''
         Collect (after downloading them, if needed) the stations and their
@@ -91,7 +121,7 @@ class MeteoFrance:
         # The csv file of meteo stations (names, ids and locations) if downloaded,
         # if not available in the config directory within data / meteo_france
         link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
         # The csv file of meteo stations (names, ids and locations) if downloaded,
         # if not available in the config directory within data / meteo_france
         link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
-        p = Path(self._data_directory / 'config' )
+        p = Path(self._data_directory / 'config')
         csv_file = p / basename(link)
         if not isfile(csv_file):
             logger.info('Downloading location stations from MeteoFrance')
         csv_file = p / basename(link)
         if not isfile(csv_file):
             logger.info('Downloading location stations from MeteoFrance')
@@ -104,11 +134,11 @@ class MeteoFrance:
             reader = DictReader(f, delimiter=';')
             for row in reader:
                 latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
             reader = DictReader(f, delimiter=';')
             for row in reader:
                 latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
-                self._dict_stations[row['Nom'].replace("'",'’')] = {
-                    'id' : row['ID'],
-                    'longitude' : longitude,
-                    'latitude' : latitude,
-                    'distance' : vincenty(
+                self._dict_stations[row['Nom'].replace("'", '’')] = {
+                    'id': row['ID'],
+                    'longitude': longitude,
+                    'latitude': latitude,
+                    'distance': vincenty(
                         (self._latitude, self._longitude),
                         (latitude, longitude)).km
                 }
                         (self._latitude, self._longitude),
                         (latitude, longitude)).km
                 }
@@ -116,13 +146,11 @@ class MeteoFrance:
         # Find the closest stations
         logger.info('Finding the closest stations')
         stations_by_distance = sorted(self._dict_stations.keys(),
         # Find the closest stations
         logger.info('Finding the closest stations')
         stations_by_distance = sorted(self._dict_stations.keys(),
-                                      key = lambda x: self._dict_stations[x]['distance'])
+                                      key=lambda x: self._dict_stations[x]['distance'])
         logger.info(f'The {self._nb_stations} closest stations are: '
                     f'{", ".join(stations_by_distance[:self._nb_stations])}.')
         return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
 
         logger.info(f'The {self._nb_stations} closest stations are: '
                     f'{", ".join(stations_by_distance[:self._nb_stations])}.')
         return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
 
-
-
     def _collect_historical_data(self):
         '''
         We collect all csv files from January 1996 until the month
     def _collect_historical_data(self):
         '''
         We collect all csv files from January 1996 until the month
@@ -133,8 +161,8 @@ class MeteoFrance:
         # List of year-months to consider
         historical = []
         date_end = self._end
         # List of year-months to consider
         historical = []
         date_end = self._end
-        for year in range(self._start.year, date_end.year+1):
-            for month in range(1,13):
+        for year in range(self._start.year, date_end.year + 1):
+            for month in range(1, 13):
                 date = datetime(year, month, 1)
                 if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
                 date = datetime(year, month, 1)
                 if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
@@ -145,7 +173,7 @@ class MeteoFrance:
         p = Path(meteo_data)
         p.mkdir(exist_ok=True, parents=True)
         for date in historical:
         p = Path(meteo_data)
         p.mkdir(exist_ok=True, parents=True)
         for date in historical:
-            if not isfile(meteo_data / ('synop.'+date+'.csv')):
+            if not isfile(meteo_data / ('synop.' + date + '.csv')):
                 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
                 link += date + '.csv.gz'
                 download_path = meteo_data / basename(link)
                 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
                 link += date + '.csv.gz'
                 download_path = meteo_data / basename(link)
@@ -156,8 +184,6 @@ class MeteoFrance:
                         g.write(f.read().decode())
                         remove(meteo_data / basename(link))
 
                         g.write(f.read().decode())
                         remove(meteo_data / basename(link))
 
-
-
     def update(self):
         '''
         Update the MeteoFrance features with the last available data
     def update(self):
         '''
         Update the MeteoFrance features with the last available data
@@ -171,15 +197,13 @@ class MeteoFrance:
 
         logger.info('Update historical csv files from MeteoFrance, if needed')
         today = datetime.now()
 
         logger.info('Update historical csv files from MeteoFrance, if needed')
         today = datetime.now()
-        todel = 'synop.'+today.strftime("%Y%m")+".csv"
+        todel = 'synop.' + today.strftime("%Y%m") + ".csv"
         try:
             remove(self._data_directory / 'historical' / todel)
         except:
             logger.warning(f"{self._data_directory / 'historical' / todel} not found")
         self._collect_historical_data()
 
         try:
             remove(self._data_directory / 'historical' / todel)
         except:
             logger.warning(f"{self._data_directory / 'historical' / todel} not found")
         self._collect_historical_data()
 
-
-
     @property
     def dated_features(self):
         '''
     @property
     def dated_features(self):
         '''
@@ -193,28 +217,30 @@ class MeteoFrance:
             dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
             dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
-            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
-            logger.info(f'Collecting meteo feature information from {csv_file}')
+            logger.info('Collecting meteofrance feature information')
             # A dictionary for the features
             # A dictionary for the features
-            with open(csv_file, "r") as f:
-                reader = DictReader(f, delimiter=',')
-                dico_features = {row["abbreviation"]:
-                                   {
-                                       'name': row['name'], # feature name
-                                       'type': row['type']  # qualitative (2) or quantitative (1)
-                                    }
-                                for row in reader if row['name'] in self._features}
+            dico_features = {self._config[section]["abbreviation"]:
+                             {
+                'name': section,  # feature name
+                'numerical': self._config[section]['numerical'],
+                'categorical': self._config[section]['categorical']
+            }
+                for section in self._features}
             dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
             self._dated_features = {}
             dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
             self._dated_features = {}
-            for csv_meteo in listdir(dir_data):
+
+            for csv_meteo in sorted(listdir(dir_data)):
                 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
                 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
-                if date >= self._start and date <= self._end:
-                    logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+                if (date >= self._start and date <= self._end)\
+                        or (date.year == self._start.year and date.month == self._start.month)\
+                        or (date.year == self._end.year and date.month == self._end.month):
+                    logger.info(f'Adding meteofrance features from {csv_meteo}')
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
                             if row['numer_sta'] in self._stations:
                                 date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
                     with open(dir_data / csv_meteo, "r") as f:
                         reader = DictReader(f, delimiter=';')
                         for row in reader:
                             if row['numer_sta'] in self._stations:
                                 date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
-                                self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                                if date >= self._start and date <= self._end:
+                                    self._dated_features.setdefault(date, {}).update({dico_features[feat]['name'] + '_' + str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq', 'None')) for feat in dico_features})
         return self._dated_features
 
         return self._dated_features