[year]
binary = False
categorical = False
-numerical = True
\ No newline at end of file
+numerical = True
+
+[sunRised]
+binary = True
+categorical = False
+numerical = False
+
+[noon]
+binary = True
+categorical = False
+numerical = False
+
+[night]
+binary = True
+categorical = False
+numerical = False
+
+[daylightSavingTime]
+binary = True
+categorical = False
+numerical = False
regenerate = False
reinsert = True
-[POSITION]
-latitude = 47.25
-longitude = 6.0333
-
[STATIONS]
nb_stations = 3
[DATETIME]
-start = 01/01/2006 00:00:00
+start = 01/01/2016 00:00:00
end = 12/31/2019 23:00:00
-hourStep = 1
+hourStep = 3
+
+
+[HISTORY_KNOWLEDGE]
+nb_lines = 24//3*7*4
+
+
+[TARGET]
+config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+cumulative = True
+horizon = 3
[FEATURES]
holidays = True
meteofrance = True
ramadan = True
+sentinelles = True
[FEATURE_CONFIG]
holidays = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
ramadan = (Path.cwd() / 'config') / 'features' / 'feature_ramadan.cfg'
+sentinelles = (Path.cwd() / 'config') / 'features' / 'feature_sentinelles.cfg'
[PREPROCESSING]
order = 3
-[HISTORY_KNOWLEDGE]
-nb_lines = 24*7*4
-
-
-[TARGET]
-config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
-cumulative = True
-horizon = 0
-
[LEARNER]
config = (Path.cwd() / 'config') / 'learners' / 'lightgbm.cfg'
\ No newline at end of file
[HYPERPARAMETERS]
learning_rate = 0.1
metric = auc
-num_iterations = 1000
+num_iterations = 300
num_round = 10
-num_leaves = 31
+num_leaves = 900
objective = poisson
[HYPERPARAMETERS]
learning_rate = 0.01
-max_depth = 7
+max_depth = 6
random_state = 42
n_estimators = 10000
n_jobs = -1
+[POSITION]
+name = Besançon
+country = France
+timezone = Europe/Paris
+latitude = 47.237829
+longitude = -6.0240539
+
[SPECIFICATION]
origin = False
destination = False
from shutil import rmtree
import os
+import pytz
from .learn.learning import Learning
from .learn.preprocessing import Preprocessing
from .source.ephemeris import Ephemeris
from .source.holidays import Holidays
-from .source.ramadan import Ramadan
from .source.meteofrance import MeteoFrance
+from .source.ramadan import Ramadan
+from .source.sentinelles import Sentinelles
from .target.target import Target
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
self._config_text += f.read() + '\n\n'
- ephemerides = Ephemeris(config_file=config_file)
+ ephemerides = Ephemeris(config_file=config_file, start=self._start, end=self._end)
- ephemerides.start = self._start
- ephemerides.end = self._end
+ # ephemerides.start = self._start
+ # ephemerides.end = self._end
dated_features = ephemerides.dated_features
for date in dated_features:
def add_preprocessing(self):
self._preproc = Preprocessing(config_file=self._config,
+ start=self._start, end=self._end,
+ timestep=self._timestep,
dict_features=self.X,
dict_target=self.y)
else:
self._X = X
self._y = y
- rep = (Path.cwd() / self._file_name)
- rep.mkdir()
- self._filename = str(self._file_name / os.path.basename(self._file_name))
- self._X.to_csv(self._filename + '.csv')
self._learn()
self._evaluate()
for k in range(10):
txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
+ rep = (Path.cwd() / self._file_name)
+ rep.mkdir()
+ self._filename = str(self._file_name / os.path.basename(self._file_name))
+
print(txt)
with open(self._filename + ".result", 'w') as f:
f.write(txt)
from configparser import ConfigParser
-from csv import DictReader
-from datetime import datetime, timedelta
from itertools import chain
from logging import getLogger
from logging.config import fileConfig
-from os import listdir
from pathlib import Path
from sklearn import preprocessing
'''
def __init__(self, config_file=None,
+ start=None, end=None, timestep=None,
dict_features=None, dict_target=None):
'''
Constructor that defines all needed attributes and collects features.
'''
self._config = config_file
- self._start = datetime.strptime(self._config['DATETIME']['start'],
- '%m/%d/%Y %H:%M:%S')
- self._end = datetime.strptime(self._config['DATETIME']['end'],
- '%m/%d/%Y %H:%M:%S')
- self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
+ self._start = start
+ self._end = end
+ self._timestep = timestep
self._dict_features = dict_features
self._dict_target = dict_target
'''
logger.info("One hot encoding for categorical feature")
# We store numerical columns
+
df_out = pd.DataFrame()
for col in self._numerical_columns:
df_out[col] = self._dataframe[col]
self._fill_nan()
# Adding previous (historical) nb_interventions as features
self._add_history()
+ # self._dataframe.to_csv('toto.csv')
+ # exit()
# Normalizing numerical values
self._standardize()
# Dealing with categorical features
+from astral import LocationInfo
+from astral.sun import sun
from configparser import ConfigParser
from datetime import datetime, timedelta
from logging import getLogger
from logging.config import fileConfig
from pathlib import Path
-import time
import calendar
+import pytz
+import time
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
+
class Ephemeris:
_start = None
- _end = None
+ _end = None
- def __init__(self, config_file):
+ def __init__(self, config_file, start, end):
self._config = ConfigParser()
self._config.read(config_file)
+ self._city = LocationInfo("Besançon", "France", "Europe/Paris", 47.237829, -6.0240539)
+ self._start = start
+ self._end = end
# Collecting ephemeris features
self._features = [section for section in self._config
- if self._config[section].getboolean('numerical')
- or self._config[section].getboolean('categorical')]
+ if self._config[section].getboolean('binary')
+ or self._config[section].getboolean('categorical')
+ or self._config[section].getboolean('numerical')]
self._dated_features = {}
-
@property
def start(self):
return self._start
def start(self, x):
self._start = x
-
@property
def end(self):
return self._end
def end(self, x):
self._end = x
-
@property
def dated_features(self):
if self._dated_features == {}:
logger.info("Adding ephemeris features")
+ paris = pytz.timezone('Europe/Paris')
date = self._start
while date <= self._end:
+ datel = paris.localize(date)
dict_hour = {}
Date = time.strptime(datetime.strftime(date, '%m/%d/%Y %H:%M:%S'), '%m/%d/%Y %H:%M:%S')
+ s = sun(self._city.observer, date=date,
+ tzinfo=pytz.timezone('Europe/Paris'))
for feature in self._features:
if feature == 'hour':
dict_hour['hour'] = Date.tm_hour
# Si c'est une année bissextile et qu'on est après le 29 février, on compte une journée
# dans l'année de moins, car on va supprimer les 29 févriers, de sorte que les 14 juillets,
# les 24 décembre... tombent toujours
- if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/"+str(Date.tm_year), "%d/%m/%Y"):
- dict_hour['dayInYear'] = Date.tm_yday -1
+ if calendar.isleap(Date.tm_year) and Date >= time.strptime("29/02/" + str(Date.tm_year), "%d/%m/%Y"):
+ dict_hour['dayInYear'] = Date.tm_yday - 1
else:
dict_hour['dayInYear'] = Date.tm_yday
elif feature == 'weekInYear':
dict_hour['weekInYear'] = date.isocalendar()[1]
+ elif feature == 'sunRised':
+ dict_hour['sunRised'] = (datel >= s["sunrise"] - timedelta(minutes=30)
+ and datel <= s["sunset"] - timedelta(minutes=30))
+ elif feature == 'noon':
+ dict_hour['noon'] = (datel.hour == s["noon"].hour)
+ elif feature == 'night':
+ dict_hour['night'] = (datel <= s["dawn"] - timedelta(minutes=30)
+ or datel >= s["dusk"] - timedelta(minutes=30))
+ elif feature == 'daylightSavingTime':
+ dict_hour['daylightSavingTime'] = (datel.dst() == timedelta(0))
+
self._dated_features[date] = dict_hour
date += timedelta(hours=1)
- return self._dated_features
\ No newline at end of file
+ return self._dated_features
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
+
class Holidays:
_start = None
- _end = None
+ _end = None
def __init__(self, config_file):
# Collecting holidays features
self._features = [section for section in self._config
- if self._config[section].getboolean('numerical')
- or self._config[section].getboolean('categorical')]
+ if self._config[section].getboolean('binary')
+ or self._config[section].getboolean('categorical')
+ or self._config[section].getboolean('numerical')]
self._dated_features = {}
-
@property
def start(self):
return self._start
def start(self, x):
self._start = x
-
@property
def end(self):
return self._end
def end(self, x):
self._end = x
-
-
def _get_academic_zone(self, name, date):
dict_zones = {
- 'Caen' : ('A', 'B'),
- 'Clermont-Ferrand' : ('A', 'A'),
- 'Grenoble' : ('A', 'A'),
- 'Lyon' : ('A', 'A'),
- 'Montpellier' : ('A', 'C'),
- 'Nancy-Metz' : ('A', 'B'),
- 'Nantes' : ('A', 'B'),
- 'Rennes' : ('A', 'B'),
- 'Toulouse' : ('A', 'C'),
- 'Aix-Marseille' : ('B', 'B'),
- 'Amiens' : ('B', 'B'),
- 'Besançon' : ('B', 'A'),
- 'Dijon' : ('B', 'A'),
- 'Lille' : ('B', 'B'),
- 'Limoges' : ('B', 'A'),
- 'Nice' : ('B', 'B'),
- 'Orléans-Tours' : ('B', 'B'),
- 'Poitiers' : ('B', 'A'),
- 'Reims' : ('B', 'B'),
- 'Rouen ' : ('B', 'B'),
- 'Strasbourg' : ('B', 'B'),
- 'Bordeaux' : ('C', 'A'),
- 'Créteil' : ('C', 'C'),
- 'Paris' : ('C', 'C'),
- 'Versailles' : ('C', 'C')
+ 'Caen': ('A', 'B'),
+ 'Clermont-Ferrand': ('A', 'A'),
+ 'Grenoble': ('A', 'A'),
+ 'Lyon': ('A', 'A'),
+ 'Montpellier': ('A', 'C'),
+ 'Nancy-Metz': ('A', 'B'),
+ 'Nantes': ('A', 'B'),
+ 'Rennes': ('A', 'B'),
+ 'Toulouse': ('A', 'C'),
+ 'Aix-Marseille': ('B', 'B'),
+ 'Amiens': ('B', 'B'),
+ 'Besançon': ('B', 'A'),
+ 'Dijon': ('B', 'A'),
+ 'Lille': ('B', 'B'),
+ 'Limoges': ('B', 'A'),
+ 'Nice': ('B', 'B'),
+ 'Orléans-Tours': ('B', 'B'),
+ 'Poitiers': ('B', 'A'),
+ 'Reims': ('B', 'B'),
+ 'Rouen ': ('B', 'B'),
+ 'Strasbourg': ('B', 'B'),
+ 'Bordeaux': ('C', 'A'),
+ 'Créteil': ('C', 'C'),
+ 'Paris': ('C', 'C'),
+ 'Versailles': ('C', 'C')
}
if date < datetime(2016, 1, 1):
return dict_zones[name][0]
else:
return dict_zones[name][1]
-
@property
def dated_features(self):
if self._dated_features == {}:
logger.info("Adding holidays features")
bankHolidays = tuple(itertools.chain.from_iterable(list(JoursFeries.for_year(k).values())
- for k in range(self.start.year, self.end.year+1)))
- bankHolidaysEve = tuple(u-timedelta(days=1) for u in bankHolidays)
+ for k in range(self.start.year, self.end.year + 1)))
+ bankHolidaysEve = tuple(u - timedelta(days=1) for u in bankHolidays)
name = self._config['ZONE']['name']
date = self._start
Date = datetime.date(date)
Tomorrow = datetime.date(tomorrow)
d = SchoolHolidayDates()
dict_hour = {
- 'bankHolidays' : Date in bankHolidays,
+ 'bankHolidays': Date in bankHolidays,
'bankHolidaysEve': Date in bankHolidaysEve,
'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
tomorrow = date + timedelta(days=1)
Tomorrow = datetime.date(tomorrow)
dict_hour = {
- 'bankHolidays' : Date in bankHolidays,
+ 'bankHolidays': Date in bankHolidays,
'bankHolidaysEve': Date in bankHolidaysEve,
'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
}
- return self._dated_features
\ No newline at end of file
+ return self._dated_features
class MeteoFrance:
- _latitude = None
- _longitude = None
+ _latitude = None
+ _longitude = None
_nb_stations = None
- _start = None
- _end = None
- _features = None
+ _start = None
+ _end = None
+ _features = None
def __init__(self, config_file):
'''
# Collecting meteofrance features
self._features = [section for section in self._config
if self._config.has_option(section, 'numerical')
- and (self._config[section]['numerical'] or
- self._config[section]['categorical'])]
-
-
+ and (self._config[section]['binary'] or
+ self._config[section]['categorical'] or
+ self._config[section]['numerical'])]
@property
def start(self):
def start(self, x):
self._start = x
-
@property
def end(self):
return self._end
def end(self, x):
self._end = x
-
@property
def latitude(self):
return self._latitude
def latitude(self, x):
self._latitude = x
-
@property
def longitude(self):
return self._longitude
def longitude(self, x):
self._longitude = x
-
@property
def nb_stations(self):
return self._nb_stations
def nb_stations(self, x):
self._nb_stations = x
-
def _regenerate_directory(self):
'''
Re-creating data directory architecture for MeteoFrance
p = Path(self._data_directory / 'config')
p.mkdir(exist_ok=True, parents=True)
-
-
def _get_stations(self):
'''
Collect (after downloading them, if needed) the stations and their
# The csv file of meteo stations (names, ids and locations) if downloaded,
# if not available in the config directory within data / meteo_france
link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv'
- p = Path(self._data_directory / 'config' )
+ p = Path(self._data_directory / 'config')
csv_file = p / basename(link)
if not isfile(csv_file):
logger.info('Downloading location stations from MeteoFrance')
reader = DictReader(f, delimiter=';')
for row in reader:
latitude, longitude = eval(row['Latitude']), eval(row['Longitude'])
- self._dict_stations[row['Nom'].replace("'",'’')] = {
- 'id' : row['ID'],
- 'longitude' : longitude,
- 'latitude' : latitude,
- 'distance' : vincenty(
+ self._dict_stations[row['Nom'].replace("'", '’')] = {
+ 'id': row['ID'],
+ 'longitude': longitude,
+ 'latitude': latitude,
+ 'distance': vincenty(
(self._latitude, self._longitude),
(latitude, longitude)).km
}
# Find the closest stations
logger.info('Finding the closest stations')
stations_by_distance = sorted(self._dict_stations.keys(),
- key = lambda x: self._dict_stations[x]['distance'])
+ key=lambda x: self._dict_stations[x]['distance'])
logger.info(f'The {self._nb_stations} closest stations are: '
f'{", ".join(stations_by_distance[:self._nb_stations])}.')
return [self._dict_stations[sta]['id'] for sta in stations_by_distance][:self._nb_stations]
-
-
def _collect_historical_data(self):
'''
We collect all csv files from January 1996 until the month
# List of year-months to consider
historical = []
date_end = self._end
- for year in range(self._start.year, date_end.year+1):
- for month in range(1,13):
+ for year in range(self._start.year, date_end.year + 1):
+ for month in range(1, 13):
date = datetime(year, month, 1)
if date >= self._start and date <= date_end:
historical.append(date.strftime("%Y%m"))
p = Path(meteo_data)
p.mkdir(exist_ok=True, parents=True)
for date in historical:
- if not isfile(meteo_data / ('synop.'+date+'.csv')):
+ if not isfile(meteo_data / ('synop.' + date + '.csv')):
link = 'https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.'
link += date + '.csv.gz'
download_path = meteo_data / basename(link)
g.write(f.read().decode())
remove(meteo_data / basename(link))
-
-
def update(self):
'''
Update the MeteoFrance features with the last available data
logger.info('Update historical csv files from MeteoFrance, if needed')
today = datetime.now()
- todel = 'synop.'+today.strftime("%Y%m")+".csv"
+ todel = 'synop.' + today.strftime("%Y%m") + ".csv"
try:
remove(self._data_directory / 'historical' / todel)
except:
logger.warning(f"{self._data_directory / 'historical' / todel} not found")
self._collect_historical_data()
-
-
@property
def dated_features(self):
'''
logger.info('Collecting meteofrance feature information')
# A dictionary for the features
dico_features = {self._config[section]["abbreviation"]:
- {
- 'name': section, # feature name
- 'numerical': self._config[section]['numerical'],
- 'categorical': self._config[section]['categorical']
- }
- for section in self._features}
+ {
+ 'name': section, # feature name
+ 'numerical': self._config[section]['numerical'],
+ 'categorical': self._config[section]['categorical']
+ }
+ for section in self._features}
dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
self._dated_features = {}
+
for csv_meteo in sorted(listdir(dir_data)):
date = datetime.strptime(csv_meteo.split('.')[1], '%Y%m')
if (date >= self._start and date <= self._end)\
- or (date.year == self._start.year and date.month == self._start.month)\
- or (date.year == self._end.year and date.month == self._end.month):
+ or (date.year == self._start.year and date.month == self._start.month)\
+ or (date.year == self._end.year and date.month == self._end.month):
logger.info(f'Adding meteofrance features from {csv_meteo}')
with open(dir_data / csv_meteo, "r") as f:
reader = DictReader(f, delimiter=';')
for row in reader:
if row['numer_sta'] in self._stations:
date = datetime.strptime(row['date'], '%Y%m%d%H%M%S')
- if date >= self._start and date <= self._end:
- self._dated_features.setdefault(date,{}).update({dico_features[feat]['name']+'_'+str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq','None')) for feat in dico_features})
+ if date >= self._start and date <= self._end:
+ self._dated_features.setdefault(date, {}).update({dico_features[feat]['name'] + '_' + str(self._stations.index(row['numer_sta'])): eval(row[feat].replace('mq', 'None')) for feat in dico_features})
return self._dated_features
# Collecting holidays features
self._features = [section for section in self._config
- if self._config[section].getboolean('numerical')
- or self._config[section].getboolean('categorical')]
+ if self._config[section].getboolean('binary')
+ or self._config[section].getboolean('categorical')
+ or self._config[section].getboolean('numerical')]
self._dated_features = {}
--- /dev/null
+from configparser import ConfigParser
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Sentinelles:
+ def __init__(self, config_file):
+ '''
+ Constructor of the MeteoFrance source of feature.
+ '''
+ self._config = ConfigParser()
+ self._config.read(config_file)
class Target:
- def __init__(self, config_file = None,
- start = None, end = None, timestep = None, cumulative = None):
+ def __init__(self, config_file=None,
+ start=None, end=None, timestep=None, cumulative=None):
self._config = ConfigParser()
self._config.read(config_file)
self._stream_file = eval(self._config['DATA']['csv_file'])
self._get_located_interventions()
-
-
@property
def start(self):
return self._start
def start(self, x):
self._start = x
-
@property
def end(self):
return self._end
def end(self, x):
self._end = x
-
@property
def y(self):
return self._y
def end(self, y):
self._y = y
-
-
def _get_located_interventions(self):
if not self._config['SPECIFICATION'].getboolean('origin')\
and not self._config['SPECIFICATION'].getboolean('destination'):
+astral==2.1
attrs==19.3.0
Click==7.0
click-plugins==1.1.1