X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/predictops.git/blobdiff_plain/6a7cf0e5dd962e7da028cb0d82c6682ac87b0540..a3cc2abef295b99990b24b1c822319c80a59ffe2:/lib/source/meteofrance.py?ds=sidebyside diff --git a/lib/source/meteofrance.py b/lib/source/meteofrance.py index 3f1c3eb..88706ea 100644 --- a/lib/source/meteofrance.py +++ b/lib/source/meteofrance.py @@ -12,12 +12,16 @@ from urllib.request import urlretrieve import gzip + fileConfig((Path.cwd() / 'config') / 'logging.cfg') logger = getLogger() class MeteoFrance: - def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3): + def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3, + start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'), + end = datetime.now(), + features = []): ''' Constructor of the MeteoFrance source of feature. @@ -33,11 +37,16 @@ class MeteoFrance: latitude (float): The latitude from which we want the meteo features. longitude (float): The longitude from which we want the meteo features. nb_stations (int): Number of closest stations to consider. + features (list): Weather features that have to be integrated, according + to their names in meteofrance_features.csv (cf. config directory) ''' self._latitude = latitude self._longitude = longitude self._nb_stations = nb_stations + self._start = start + self._end = end + self._features = features self._data_directory = (Path.cwd() / 'data') / 'meteo_france' @@ -123,11 +132,11 @@ class MeteoFrance: ''' # List of year-months to consider historical = [] - date_end = datetime.now() - for year in range(1996, date_end.year+1): + date_end = self._end + for year in range(self._start.year, date_end.year+1): for month in range(1,13): date = datetime(year, month, 1) - if date <= date_end: + if date >= self._start and date <= date_end: historical.append(date.strftime("%Y%m")) # We download all csv files from meteofrance that are not in @@ -167,7 +176,6 @@ class MeteoFrance: remove(self._data_directory / 'historical' / todel) except: logger.warning(f"{self._data_directory / 'historical' / todel} not found") - system("touch "+todel) self._collect_historical_data() @@ -175,29 +183,38 @@ class MeteoFrance: @property def dated_features(self): ''' + If the attribute dated_features is None, then we create it: a dictionary + with datestamps as keys, and {features: values} as values. + - considered features are the ones from meteofrance_features.csv, found + in config/features/meteofrance directory + - only the closest meteo stations are considered + + Returns: + dict: the dictionary of features per datestamp ''' if self._dated_features == None: - csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance' / 'meteofrance_features.csv' + csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv' logger.info(f'Collecting meteo feature information from {csv_file}') # A dictionary for the features with open(csv_file, "r") as f: reader = DictReader(f, delimiter=',') - next(reader) dico_features = {row["abbreviation"]: { 'name': row['name'], # feature name 'type': row['type'] # qualitative (2) or quantitative (1) } - for row in reader} - + for row in reader if row['name'] in self._features} dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical' self._dated_features = {} for csv_meteo in listdir(dir_data): - logger.info(f'Inserting {csv_meteo} in intervention dictionary') - with open(dir_data / csv_meteo, "r") as f: - reader = DictReader(f, delimiter=';') - for row in reader: - if row['numer_sta'] in self._stations: - self._dated_features.setdefault(row['date'],{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features}) + date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m') + if date >= self._start and date <= self._end: + logger.info(f'Inserting {csv_meteo} in intervention dictionary') + with open(dir_data / csv_meteo, "r") as f: + reader = DictReader(f, delimiter=';') + for row in reader: + if row['numer_sta'] in self._stations: + date = datetime.strptime(row['date'], '%Y%m%d%H%M%S') + self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features}) return self._dated_features