]> AND Private Git Repository - predictops.git/blobdiff - lib/source/meteofrance.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Bug fixed and starting to work on the target variable
[predictops.git] / lib / source / meteofrance.py
index 3f1c3eb1069f9862841362991cf68192b85df8d6..88706ead406777ef0e119f0798cd1d169b6daf7f 100644 (file)
@@ -12,12 +12,16 @@ from urllib.request import urlretrieve
 
 import gzip
 
+
 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
 logger = getLogger()
 
 class MeteoFrance:
 
-    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3):
+    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+                 start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
+                 end = datetime.now(),
+                 features = []):
         '''
         Constructor of the MeteoFrance source of feature.
 
@@ -33,11 +37,16 @@ class MeteoFrance:
             latitude (float): The latitude from which we want the meteo features.
             longitude (float): The longitude from which we want the meteo features.
             nb_stations (int): Number of closest stations to consider.
+            features (list): Weather features that have to be integrated, according
+                  to their names in meteofrance_features.csv (cf. config directory)
 
         '''
         self._latitude = latitude
         self._longitude = longitude
         self._nb_stations = nb_stations
+        self._start = start
+        self._end = end
+        self._features = features
 
         self._data_directory = (Path.cwd() / 'data') / 'meteo_france'
 
@@ -123,11 +132,11 @@ class MeteoFrance:
         '''
         # List of year-months to consider
         historical = []
-        date_end = datetime.now()
-        for year in range(1996, date_end.year+1):
+        date_end = self._end
+        for year in range(self._start.year, date_end.year+1):
             for month in range(1,13):
                 date = datetime(year, month, 1)
-                if date <= date_end:
+                if date >= self._start and date <= date_end:
                     historical.append(date.strftime("%Y%m"))
 
         # We download all csv files from meteofrance that are not in
@@ -167,7 +176,6 @@ class MeteoFrance:
             remove(self._data_directory / 'historical' / todel)
         except:
             logger.warning(f"{self._data_directory / 'historical' / todel} not found")
-        system("touch "+todel)
         self._collect_historical_data()
 
 
@@ -175,29 +183,38 @@ class MeteoFrance:
     @property
     def dated_features(self):
         '''
+        If the attribute dated_features is None, then we create it: a dictionary
+        with datestamps as keys, and {features: values} as values.
+         - considered features are the ones from meteofrance_features.csv, found
+           in config/features/meteofrance directory
+         - only the closest meteo stations are considered
+
+        Returns:
+            dict: the dictionary of features per datestamp
         '''
         if self._dated_features == None:
-            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance' / 'meteofrance_features.csv'
+            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
             logger.info(f'Collecting meteo feature information from {csv_file}')
             # A dictionary for the features
             with open(csv_file, "r") as f:
                 reader = DictReader(f, delimiter=',')
-                next(reader)
                 dico_features = {row["abbreviation"]:
                                    {
                                        'name': row['name'], # feature name
                                        'type': row['type']  # qualitative (2) or quantitative (1)
                                     }
-                                for row in reader}
-
+                                for row in reader if row['name'] in self._features}
             dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical'
             self._dated_features = {}
             for csv_meteo in listdir(dir_data):
-                logger.info(f'Inserting {csv_meteo} in intervention dictionary')
-                with open(dir_data / csv_meteo, "r") as f:
-                    reader = DictReader(f, delimiter=';')
-                    for row in reader:
-                        if row['numer_sta'] in self._stations:
-                            self._dated_features.setdefault(row['date'],{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+                date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
+                if date >= self._start and date <= self._end:
+                    logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+                    with open(dir_data / csv_meteo, "r") as f:
+                        reader = DictReader(f, delimiter=';')
+                        for row in reader:
+                            if row['numer_sta'] in self._stations:
+                                date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
+                                self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
         return self._dated_features