import gzip
+
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
class MeteoFrance:
- def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3):
+ def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
+ start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
+ end = datetime.now(),
+ features = []):
'''
Constructor of the MeteoFrance source of feature.
latitude (float): The latitude from which we want the meteo features.
longitude (float): The longitude from which we want the meteo features.
nb_stations (int): Number of closest stations to consider.
+ features (list): Weather features that have to be integrated, according
+ to their names in meteofrance_features.csv (cf. config directory)
'''
self._latitude = latitude
self._longitude = longitude
self._nb_stations = nb_stations
+ self._start = start
+ self._end = end
+ self._features = features
self._data_directory = (Path.cwd() / 'data') / 'meteo_france'
'''
# List of year-months to consider
historical = []
- date_end = datetime.now()
- for year in range(1996, date_end.year+1):
+ date_end = self._end
+ for year in range(self._start.year, date_end.year+1):
for month in range(1,13):
date = datetime(year, month, 1)
- if date <= date_end:
+ if date >= self._start and date <= date_end:
historical.append(date.strftime("%Y%m"))
# We download all csv files from meteofrance that are not in
@property
def dated_features(self):
'''
+ If the attribute dated_features is None, then we create it: a dictionary
+ with datestamps as keys, and {features: values} as values.
+ - considered features are the ones from meteofrance_features.csv, found
+ in config/features/meteofrance directory
+ - only the closest meteo stations are considered
+
+ Returns:
+ dict: the dictionary of features per datestamp
'''
if self._dated_features == None:
- csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance' / 'meteofrance_features.csv'
+ csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
logger.info(f'Collecting meteo feature information from {csv_file}')
# A dictionary for the features
with open(csv_file, "r") as f:
reader = DictReader(f, delimiter=',')
- next(reader)
dico_features = {row["abbreviation"]:
{
'name': row['name'], # feature name
'type': row['type'] # qualitative (2) or quantitative (1)
}
- for row in reader}
-
+ for row in reader if row['name'] in self._features}
dir_data = Path.cwd() / 'data' / 'meteo_france' / 'historical'
self._dated_features = {}
for csv_meteo in listdir(dir_data):
- logger.info(f'Inserting {csv_meteo} in intervention dictionary')
- with open(dir_data / csv_meteo, "r") as f:
- reader = DictReader(f, delimiter=';')
- for row in reader:
- if row['numer_sta'] in self._stations:
- self._dated_features.setdefault(row['date'],{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+ date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
+ if date >= self._start and date <= self._end:
+ logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+ with open(dir_data / csv_meteo, "r") as f:
+ reader = DictReader(f, delimiter=';')
+ for row in reader:
+ if row['numer_sta'] in self._stations:
+ date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
+ self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
return self._dated_features