1 from configparser import ConfigParser
2 from csv import DictReader
3 from geopy import distance
4 from pathlib import Path
5 from shutil import rmtree
7 from logging.config import fileConfig
8 from os.path import isfile, basename
9 from urllib.request import urlretrieve
14 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
15 logger = logging.getLogger()
19 def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3):
21 Constructor of the MeteoFrance source of feature.
23 - It will reinitiate the data directory, if asked in the config
25 - It searches for the nb_stations meteo stations closest to the provided
26 point (longitude and latitude)
28 For more information about this source of feature, see:
29 https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
32 latitude (float): The latitude from which we want the meteo features.
33 longitude (float): The longitude from which we want the meteo features.
34 nb_stations (int): Number of closest stations to consider.
37 self._latitude = latitude
38 self._longitude = longitude
39 self._nb_stations = nb_stations
41 self._data_directory = (Path.cwd() / 'data') / 'meteo_france'
43 # Re-creating data directory architecture for MeteoFrance, if asked
44 config = ConfigParser()
45 config.read((Path.cwd() / 'config') / 'features.cfg')
46 if eval(config['meteofrance']['regenerate']):
47 self._regenerate_directory()
49 # Collecting the closest meteo station
50 self._stations = self._get_stations()
55 def _regenerate_directory(self):
57 Re-creating data directory architecture for MeteoFrance
59 logger.info("Regenerating meteofrance data directory")
61 rmtree(self._data_directory)
64 p = Path(self._data_directory / 'historical')
65 p.mkdir(exist_ok=True, parents=True)
66 p = Path(self._data_directory / 'config')
67 p.mkdir(exist_ok=True, parents=True)
71 def _get_stations(self):
73 Collect (after downloading them, if needed) the stations and their
74 locations in a dictionary
77 list: The self._nb_stations closest station IDs, starting by the
80 # The csv file of meteo stations (names, ids and locations) if downloaded,
81 # if not available in the config directory within data / meteo_france
82 link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
83 p = Path(self._data_directory / 'config' )
84 csv_file = p / basename(link)
85 if not isfile(csv_file):
86 logger.info('Downloading location stations from MeteoFrance')
87 urlretrieve(link, csv_file)
89 # A dictionary for the meteo stations is created
91 logger.info('Collecting information about meteo stations')
92 with open(csv_file, "r") as f:
93 reader = DictReader(f, delimiter=';')
95 latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
96 dict_stations[row['Nom'].replace("'",'’')] = {
98 'longitude' : longitude,
99 'latitude' : latitude,
100 'distance' : distance.vincenty(
101 (self._latitude, self._longitude),
102 (latitude, longitude)).km
105 # Find the closest stations
106 logger.info('Finding the closest stations')
107 stations_by_distance = sorted(dict_stations.keys(),
108 key = lambda x: dict_stations[x]['distance'])
109 logger.info(f'The {self._nb_stations} closest stations are: '
110 f'{", ".join(stations_by_distance[:self._nb_stations])}.')
111 return [dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
115 def _get_feature(self):