1 from configparser import ConfigParser
2 from csv import DictReader
3 from datetime import datetime
4 from geopy.distance import vincenty
5 from logging import getLogger
6 from logging.config import fileConfig
7 from os import listdir, remove
8 from os.path import isfile, basename
9 from pathlib import Path
10 from shutil import rmtree
11 from urllib.request import urlretrieve
16 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
19 CSV_FILE = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
31 def __init__(self, config_file):
33 Constructor of the MeteoFrance source of feature.
35 - It will reinitiate the data directory, if asked in the config
37 - It searches for the nb_stations meteo stations closest to the provided
38 point (longitude and latitude)
40 For more information about this source of feature, see:
41 https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
45 latitude (float): The latitude from which we want the meteo features.
46 longitude (float): The longitude from which we want the meteo features.
47 nb_stations (int): Number of closest stations to consider.
48 - provided to the constructor
49 features (list): Weather features that have to be integrated, according
50 to their names in meteofrance_features.csv (cf. config directory)
53 self._config = ConfigParser()
54 self._config.read(config_file)
56 self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
58 self._dated_features = None
60 # Re-creating data directory architecture for MeteoFrance, if asked
61 if self._config['GENERAL'].getboolean('regenerate'):
62 self._regenerate_directory()
64 # Collecting the closest meteo station
65 self._nb_stations = self._config['STATIONS'].getint('nb_stations')
66 self._stations = self._get_stations()
68 # Collecting meteofrance features
69 with open(CSV_FILE, "r") as f:
70 reader = DictReader(f, delimiter=',')
71 self._features = [row['name'] for row in reader
72 if self._config['FEATURES'].getboolean(row['name'])]
98 def latitude(self, x):
104 return self._longitude
107 def longitude(self, x):
112 def nb_stations(self):
113 return self._nb_stations
116 def nb_stations(self, x):
117 self._nb_stations = x
120 def _regenerate_directory(self):
122 Re-creating data directory architecture for MeteoFrance
124 logger.info("Regenerating meteofrance data directory")
126 rmtree(self._data_directory)
129 p = Path(self._data_directory / 'historical')
130 p.mkdir(exist_ok=True, parents=True)
131 p = Path(self._data_directory / 'config')
132 p.mkdir(exist_ok=True, parents=True)
136 def _get_stations(self):
138 Collect (after downloading them, if needed) the stations and their
139 locations in a dictionary
142 list: The self._nb_stations closest station IDs, starting by the
145 # The csv file of meteo stations (names, ids and locations) if downloaded,
146 # if not available in the config directory within data / meteo_france
147 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
148 p = Path(self._data_directory / 'config' )
149 csv_file = p / basename(link)
150 if not isfile(csv_file):
151 logger.info('Downloading location stations from MeteoFrance')
152 urlretrieve(link, csv_file)
154 # A dictionary for the meteo stations is created
155 self._dict_stations = {}
156 logger.info('Collecting information about meteo stations')
157 with open(csv_file, "r") as f:
158 reader = DictReader(f, delimiter=';')
160 latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
161 self._dict_stations[row['Nom'].replace("'",'’')] = {
163 'longitude' : longitude,
164 'latitude' : latitude,
165 'distance' : vincenty(
166 (self._latitude, self._longitude),
167 (latitude, longitude)).km
170 # Find the closest stations
171 logger.info('Finding the closest stations')
172 stations_by_distance = sorted(self._dict_stations.keys(),
173 key = lambda x: self._dict_stations[x]['distance'])
174 logger.info(f'The {self._nb_stations} closest stations are: '
175 f'{", ".join(stations_by_distance[:self._nb_stations])}.')
176 return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
180 def _collect_historical_data(self):
182 We collect all csv files from January 1996 until the month
183 before now. The argument in the url to download are of the
184 form 201001 for January 2010. We start by computing all these
185 patterns, in historical list.
187 # List of year-months to consider
190 for year in range(self._start.year, date_end.year+1):
191 for month in range(1,13):
192 date = datetime(year, month, 1)
193 if date >= self._start and date <= date_end:
194 historical.append(date.strftime("%Y%m"))
196 # We download all csv files from meteofrance that are not in
197 # the data repository
198 meteo_data = self._data_directory / 'historical'
200 p.mkdir(exist_ok=True, parents=True)
201 for date in historical:
202 if not isfile(meteo_data / ('synop.'+date+'.csv')):
203 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
204 link += date + '.csv.gz'
205 download_path = meteo_data / basename(link)
206 urlretrieve(link, download_path)
207 with gzip.open(download_path, 'rb') as f:
208 csv_file = meteo_data / basename(link[:-3])
209 with open(csv_file, 'w') as g:
210 g.write(f.read().decode())
211 remove(meteo_data / basename(link))
217 Update the MeteoFrance features with the last available data
219 # We collect archive files from MeteoFrance, until the current month
220 # by using the same method than for data generation : this is currently
221 # based on the presence of a synop.+date+.csv' file in the
222 # data/meteo_france/historical directory. The file corresponding to the
223 # current month is deleted first, so that its most recent version will
224 # be downloaded by calling self._collect_historical_data
226 logger.info('Update historical csv files from MeteoFrance, if needed')
227 today = datetime.now()
228 todel = 'synop.'+today.strftime("%Y%m")+".csv"
230 remove(self._data_directory / 'historical' / todel)
232 logger.warning(f"{self._data_directory / 'historical' / todel} not found")
233 self._collect_historical_data()
238 def dated_features(self):
240 If the attribute dated_features is None, then we create it: a dictionary
241 with datestamps as keys, and {features: values} as values.
242 - considered features are the ones from meteofrance_features.csv, found
243 in config/features/meteofrance directory
244 - only the closest meteo stations are considered
247 dict: the dictionary of features per datestamp
249 if self._dated_features == None:
250 logger.info(f'Collecting meteo feature information from {CSV_FILE}')
251 # A dictionary for the features
252 with open(CSV_FILE, "r") as f:
253 reader = DictReader(f, delimiter=',')
254 dico_features = {row["abbreviation"]:
256 'name': row['name'], # feature name
257 'type': row['type'] # qualitative (2) or quantitative (1)
259 for row in reader if row['name'] in self._features}
260 #print([row for row in reader])
261 #print([row for row in reader if row['name'] in self._features])
262 dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
263 self._dated_features = {}
264 for csv_meteo in listdir(dir_data):
265 date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
266 if (date >= self._start and date <= self._end)\
267 or (date.year == self._start.year and date.month == self._start.month)\
268 or (date.year == self._end.year and date.month == self._end.month):
269 logger.info(f'Inserting {csv_meteo} in intervention dictionary')
270 with open(dir_data / csv_meteo, "r") as f:
271 reader = DictReader(f, delimiter=';')
273 if row['numer_sta'] in self._stations:
274 date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
275 if date >= self._start and date <= self._end:
276 self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
277 return self._dated_features