[hour]
+binary = False
categorical = True
numerical = False
[dayInWeek]
+binary = False
categorical = True
numerical = False
[dayInMonth]
+binary = False
categorical = True
numerical = False
[dayInYear]
+binary = False
categorical = True
numerical = False
[weekInYear]
+binary = False
categorical = True
numerical = False
[month]
+binary = False
categorical = False
numerical = True
[year]
+binary = False
categorical = False
numerical = True
\ No newline at end of file
name = Besançon
[bankHolidays]
-categorical = True
+binary = True
+categorical = False
numerical = False
[bankHolidaysEve]
-categorical = True
+binary = True
+categorical = False
numerical = False
[holidays]
-categorical = True
+binary = True
+categorical = False
numerical = False
[holidaysEve]
-categorical = True
+binary = True
+categorical = False
numerical = False
[temperature]
abbreviation = t
+binary = False
categorical = False
numerical = True
[pressure]
abbreviation = pres
+binary = False
categorical = False
numerical = True
[pressureVariation]
abbreviation = tend
+binary = False
categorical = False
numerical = True
[barometricTrend]
abbreviation = cod_tend
+binary = False
categorical = True
numerical = False
[humidity]
abbreviation = u
+binary = False
categorical = False
numerical = True
[dewPoint]
abbreviation = td
+binary = False
categorical = False
numerical = True
[lastHourRainfall]
abbreviation = rr1
+binary = False
categorical = False
numerical = True
[last3hHourRainfall]
abbreviation = rr3
+binary = False
categorical = False
numerical = True
[meanWindSpeed10min]
abbreviation = ff
+binary = False
categorical = False
numerical = True
[meanWindDirection10min]
abbreviation = dd
+binary = False
categorical = False
numerical = True
[gustsOverAPeriod]
abbreviation = rafper
+binary = False
categorical = False
numerical = True
[horizontalVisibility]
abbreviation = vv
+binary = False
categorical = False
numerical = True
[currentWeather]
abbreviation = ww
+binary = False
categorical = True
numerical = False
\ No newline at end of file
--- /dev/null
+[ramadanEve]
+binary = True
+categorical = False
+numerical = False
+
+[ramadan]
+binary = True
+categorical = False
+numerical = False
+
+[ramadanDayAfter]
+binary = True
+categorical = False
+numerical = False
\ No newline at end of file
[DATETIME]
-start = 01/01/2016 00:00:00
-end = 12/31/2018 23:00:00
+start = 01/01/2006 00:00:00
+end = 12/31/2019 23:00:00
hourStep = 3
[FEATURES]
-meteofrance = True
ephemeris = True
holidays = True
+meteofrance = True
+ramadan = True
[FEATURE_CONFIG]
-meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
ephemeris = (Path.cwd() / 'config') / 'features' / 'feature_ephemeris.cfg'
holidays = (Path.cwd() / 'config') / 'features' / 'feature_holidays.cfg'
+meteofrance = (Path.cwd() / 'config') / 'features' / 'feature_meteo.cfg'
+ramadan = (Path.cwd() / 'config') / 'features' / 'feature_ramadan.cfg'
[PREPROCESSING]
-fill_method = spline
+fill_method = linear
order = 3
[HISTORY_KNOWLEDGE]
-nb_lines = 5
+nb_lines = 24//3*7*4
[TARGET]
-config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
-
+config = (Path.cwd() / 'config') / 'targets' / 'sdis25.cfg'
+cumulative = True
+horizon = 1
[LEARNER]
config = (Path.cwd() / 'config') / 'learners' / 'xgboost.cfg'
\ No newline at end of file
--- /dev/null
+[MODEL]
+method = lightgbm
+
+[HYPERPARAMETERS]
+learning_rate = 0.1
+metric = auc
+num_iterations = 100
+num_round = 10
+num_leaves = 31
+objective = poisson
learning_rate = 0.01
max_depth = 7
random_state = 42
-n_estimators = 1000
+n_estimators = 10000
n_jobs = -1
objective = 'count:poisson'
\ No newline at end of file
if __name__ == '__main__':
config = (Path.cwd() / 'config') / 'learn.cfg'
- engine = Engine(config_file = config)
-
- engine.add_features()
- engine.add_target()
-
- engine.add_preprocessing()
-
- engine.learn()
+ with Engine(config_file = config) as e:
+ e.add_features()
+ e.add_target()
+ e.add_preprocessing()
+ e.learn()
'''target = toarea(stream_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
from pathlib import Path
from shutil import rmtree
+import os
+
+from .learn.learning import Learning
+from .learn.preprocessing import Preprocessing
from .source.ephemeris import Ephemeris
from .source.holidays import Holidays
+from .source.ramadan import Ramadan
from .source.meteofrance import MeteoFrance
-from .learn.learning import Learning
-from .learn.preprocessing import Preprocessing
from .target.target import Target
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
-class Engine:
+class Engine(object):
- def __init__(self, config_file = (Path.cwd() / 'config') / 'learn.cfg'):
+ def __enter__(self):
+ return self
+
+ def __exit__(self, type, value, traceback):
+ with open(str(self._file_name / os.path.basename(self._file_name)) + '.cfg', 'w') as f:
+ f.write(self._config_text)
+
+ def __init__(self, config_file=(Path.cwd() / 'config') / 'learn.cfg'):
self._config = ConfigParser()
self._config.read(config_file)
+ launching_time = datetime.strftime(datetime.now(), '%Y_%m_%d_%H_%M')
+ self._name = os.path.splitext(os.path.basename(eval(self._config['TARGET']['config'])))[0]
+ self._file_name = f"{self._name}-{launching_time}"
+ p = Path.cwd() / 'results' / self._name
+ p.mkdir(exist_ok=True, parents=True)
+ self._file_name = p / self._file_name
+
+ self._config_text = ''
+ with open(config_file) as f:
+ self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+ self._config_text += f.read() + '\n\n'
+
self._start = datetime.strptime(self._config['DATETIME']['start'],
'%m/%d/%Y %H:%M:%S')
self._end = datetime.strptime(self._config['DATETIME']['end'],
- '%m/%d/%Y %H:%M:%S')
+ '%m/%d/%Y %H:%M:%S')
- self._timestep = timedelta(hours =
- self._config['DATETIME'].getfloat('hourStep'))
+ self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
self._X = {}
-
-
def clean(self):
# Cleaning the data directory
logger.info("Cleaning and restoring data directory")
- directory = Path.cwd() / 'data'
+ directory = Path.cwd() / 'data'
if directory.is_dir():
rmtree(directory)
p = Path(Path.cwd() / 'data')
p.mkdir()
-
def add_features(self):
- if self._config['FEATURES'].getboolean('meteofrance'):
- meteofeature = MeteoFrance(config_file =
- eval(self._config['FEATURE_CONFIG']['meteofrance']))
-
- meteofeature.start = self._start
- meteofeature.end = self._end
-
- meteofeature.update()
- dated_features = meteofeature.dated_features
- for date in dated_features:
- self._X.setdefault(date,{}).update(dated_features[date])
-
if self._config['FEATURES'].getboolean('ephemeris'):
- ephemerides = Ephemeris(config_file =
- eval(self._config['FEATURE_CONFIG']['ephemeris']))
+ config_file = eval(self._config['FEATURE_CONFIG']['ephemeris'])
+ with open(config_file) as f:
+ self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+ self._config_text += f.read() + '\n\n'
+
+ ephemerides = Ephemeris(config_file=config_file)
ephemerides.start = self._start
ephemerides.end = self._end
dated_features = ephemerides.dated_features
for date in dated_features:
- self._X.setdefault(date,{}).update(dated_features[date])
+ self._X.setdefault(date, {}).update(dated_features[date])
if self._config['FEATURES'].getboolean('holidays'):
- holidays = Holidays(config_file =
- eval(self._config['FEATURE_CONFIG']['holidays']))
+ config_file = eval(self._config['FEATURE_CONFIG']['holidays'])
+ with open(config_file) as f:
+ self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+ self._config_text += f.read() + '\n\n'
+
+ holidays = Holidays(config_file=config_file)
holidays.start = self._start
holidays.end = self._end
dated_features = holidays.dated_features
for date in dated_features:
- self._X.setdefault(date,{}).update(dated_features[date])
+ self._X.setdefault(date, {}).update(dated_features[date])
+
+ if self._config['FEATURES'].getboolean('meteofrance'):
+ config_file = eval(self._config['FEATURE_CONFIG']['meteofrance'])
+ with open(config_file) as f:
+ self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+ self._config_text += f.read() + '\n\n'
+ meteofeature = MeteoFrance(config_file=config_file)
+
+ meteofeature.start = self._start
+ meteofeature.end = self._end
+
+ meteofeature.update()
+ dated_features = meteofeature.dated_features
+ for date in dated_features:
+ self._X.setdefault(date, {}).update(dated_features[date])
+
+ if self._config['FEATURES'].getboolean('ramadan'):
+ config_file = eval(self._config['FEATURE_CONFIG']['ramadan'])
+ with open(config_file) as f:
+ self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+ self._config_text += f.read() + '\n\n'
+
+ ramadan = Ramadan(config_file=config_file)
+
+ ramadan.start = self._start
+ ramadan.end = self._end
+
+ dated_features = ramadan.dated_features
+ for date in dated_features:
+ self._X.setdefault(date, {}).update(dated_features[date])
def add_target(self):
- self._target = Target(config_file = eval(self._config['TARGET']['config']),
- start = self._start, end = self._end,
- timestep = self._timestep)
+ config_file = eval(self._config['TARGET']['config'])
+ cumulative = self._config['TARGET'].getboolean('cumulative')
+ with open(config_file) as f:
+ self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+ self._config_text += f.read() + '\n\n'
+ self._target = Target(config_file=config_file,
+ start=self._start, end=self._end,
+ timestep=self._timestep, cumulative=cumulative)
def add_preprocessing(self):
- self._preproc = Preprocessing(config_file = self._config,
- dict_features = self.X,
- dict_target = self.y)
-
+ self._preproc = Preprocessing(config_file=self._config,
+ dict_features=self.X,
+ dict_target=self.y)
def learn(self):
- history = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
- self._learner = Learning(config_file = eval(self._config['LEARNER']['config']),
- X = self._preproc.dataframe, y = list(self.y.values())[history:])
+ config_file = eval(self._config['LEARNER']['config'])
+ with open(config_file) as f:
+ self._config_text += f"{'='*10} {os.path.basename(config_file)} {'='*10}\n\n"
+ self._config_text += f.read() + '\n\n'
+ history = eval(self._config['HISTORY_KNOWLEDGE']['nb_lines'])
+ self._learner = Learning(config_file=config_file, file_name=self._file_name,
+ X=self._preproc.dataframe, y=list(self.y.values())[history:],
+ horizon=self._config['TARGET'].getint('horizon'))
@property
def X(self):
def X(self, x):
self._X = x
-
@property
def y(self):
return self._target.y
from configparser import ConfigParser
+from logging import getLogger
+from logging.config import fileConfig
from math import sqrt
+from pathlib import Path
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
+from statistics import mean, stdev
+import lightgbm as lgb
+import matplotlib
+import os
+import pylab as P
import xgboost
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
class Learning:
- def __init__(self, config_file = None,
- X = None, y = None):
+ def __init__(self, config_file=None, file_name=None,
+ X=None, y=None, horizon=0):
self._config = ConfigParser()
self._config.read(config_file)
+ self._file_name = file_name
+ logger.info("Dealing with the horizon of prediction")
+ self._X = X[:-horizon]
+ self._y = y[horizon:]
+ self._learn()
+ self._evaluate()
- df = X
- df['cible'] = y
-
- train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
- train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
+ def _learn(self):
+ logger.info("Generation of learning sets")
+ self._df = self._X
+ self._df['cible'] = self._y
+ train_val_set, test_set = train_test_split(self._df, test_size=0.2, random_state=42)
+ train_set, val_set = train_test_split(train_val_set, test_size=0.2, random_state=42)
- X_test = test_set.drop('cible', axis = 1)
- y_test = test_set['cible'].copy()
+ self._X_test = test_set.drop('cible', axis=1)
+ self._y_test = test_set['cible'].copy()
X_train = train_set.drop('cible', axis=1)
y_train = train_set['cible'].copy()
X_val = val_set.drop('cible', axis=1)
y_val = val_set['cible'].copy()
-
+ logger.info("Start learning")
if self._config['MODEL']['method'] == 'xgboost':
+ logger.info("Using xgboost regressor")
+ self._reg = xgboost.XGBRegressor(learning_rate=self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
+ max_depth=self._config['HYPERPARAMETERS'].getint('max_depth'),
+ random_state=self._config['HYPERPARAMETERS'].getint('random_state'),
+ n_estimators=self._config['HYPERPARAMETERS'].getint('n_estimators'),
+ n_jobs=self._config['HYPERPARAMETERS'].getint('n_jobs'),
+ objective='count:poisson')
+
+ self._reg.fit(X_train, y_train,
+ eval_set=[(X_val, y_val)],
+ early_stopping_rounds=10)
+ elif self._config['MODEL']['method'] == 'lightgbm':
+ train_data = lgb.Dataset(X_train, label=y_train)
+ val_data = lgb.Dataset(X_val, label=y_val)
+ num_round = self._config['HYPERPARAMETERS'].getint('num_round')
+ param = {
+ 'learning_rate': self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
+ 'metric': self._config['HYPERPARAMETERS'].get('metric'),
+ 'num_iterations': self._config['HYPERPARAMETERS'].getint('num_iterations'),
+ 'num_leaves': self._config['HYPERPARAMETERS'].getint('num_leaves'),
+ 'objective': self._config['HYPERPARAMETERS'].get('objective')
+ }
+ self._reg = lgb.train(param, train_data, num_round, valid_sets=[val_data])
+
+ def _evaluate(self):
+ logger.info("Evaluation of the learner")
+ y_test_pred = self._reg.predict(self._X_test)
+ txt = f"Average interventions per time unit: {mean(self._df.cible)}\n"
+ txt += f"Standard deviation: {stdev(self._df.cible)}\n\n"
- xgb_reg = xgboost.XGBRegressor(learning_rate = self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
- max_depth = self._config['HYPERPARAMETERS'].getint('max_depth'),
- random_state = self._config['HYPERPARAMETERS'].getint('random_state'),
- n_estimators = self._config['HYPERPARAMETERS'].getint('n_estimators'),
- n_jobs = self._config['HYPERPARAMETERS'].getint('n_jobs'),
- objective = 'count:poisson')
+ txt += f"Mean absolute error: {mean_absolute_error(y_test_pred, self._y_test)}\n"
+ txt += f"Root mean squared error: {sqrt(mean_squared_error(y_test_pred, self._y_test))}\n\n"
- xgb_reg.fit(X_train, y_train,
- eval_set=[(X_val, y_val)],
- early_stopping_rounds=10)
+ for k in range(10):
+ txt += f"Percentage of errors lower than {k}: {[abs(int(u-v))<=k for u,v in zip(self._y_test.values, y_test_pred)].count(True)/len(self._y_test)*100}\n"
- y_test_pred = xgb_reg.predict(X_test)
- print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))
\ No newline at end of file
+ print(txt)
+ rep = (Path.cwd() / self._file_name)
+ rep.mkdir()
+ filename = str(self._file_name / os.path.basename(self._file_name))
+ with open(filename + ".result", 'w') as f:
+ f.write(txt)
+
+ y_true = self._df[self._df.year == self._df.year.max()].cible
+ x_true = self._df[self._df.year == self._df.year.max()].drop('cible', axis=1)
+
+ yy_test_pred = self._reg.predict(x_true)
+ P.figure(figsize=(36, 16))
+ P.plot(list(y_true)[:300], color='blue', label='actual')
+ P.plot(yy_test_pred[:300], color='red', label='predicted')
+ P.title('Predictions for 2018')
+ P.xlabel('Hour in the year')
+ P.ylabel('Number of cumulated interventions')
+ P.legend()
+ P.savefig(filename + ".png")
+
+ yy_test_pred = self._reg.predict(self._X_test)
+ P.figure(figsize=(36, 16))
+ P.plot(list(self._y_test)[:300], color='blue', label='actual')
+ P.plot(yy_test_pred[:300], color='red', label='predicted')
+ P.title('Predictions for test set')
+ P.xlabel('Hour in the year')
+ P.ylabel('Number of cumulated interventions')
+ P.legend()
+ P.savefig(filename + "-test.png")
+
+ if self._config['MODEL']['method'] == 'xgboost':
+ xgboost.plot_importance(self._reg)
+ fig = matplotlib.pyplot.gcf()
+ fig.set_size_inches(15, 130)
+ fig.savefig(filename + '-feat_importance.pdf')
fileConfig((Path.cwd() / 'config') / 'logging.cfg')
logger = getLogger()
+
class Preprocessing:
'''
Generate a pandas dataframe from a dictionary of features per datetime, which
- NaN values are then filled with last known values.
'''
- def __init__(self, config_file = None,
- dict_features = None, dict_target = None):
+ def __init__(self, config_file=None,
+ dict_features=None, dict_target=None):
'''
Constructor that defines all needed attributes and collects features.
'''
self._start = datetime.strptime(self._config['DATETIME']['start'],
'%m/%d/%Y %H:%M:%S')
self._end = datetime.strptime(self._config['DATETIME']['end'],
- '%m/%d/%Y %H:%M:%S')
- self._timestep = timedelta(hours =
- self._config['DATETIME'].getfloat('hourStep'))
+ '%m/%d/%Y %H:%M:%S')
+ self._timestep = timedelta(hours=self._config['DATETIME'].getfloat('hourStep'))
self._dict_features = dict_features
self._dict_target = dict_target
self._datetimes = []
self._features = set(chain.from_iterable([tuple(u.keys())
- for u in [*dict_features.values()]]))
+ for u in [*dict_features.values()]]))
#feature_files = Path.cwd() / 'config' / 'features'
- self._features = {feat : {'numerical': False, 'categorical': False}
+ self._features = {feat: {'numerical': False, 'categorical': False}
for feat in self._features}
for feature in self._config['FEATURES']:
if self._config['FEATURES'][feature]:
feature_file = self._config['FEATURE_CONFIG'][feature]
config = ConfigParser()
- config.read(feature_file)
+ config.read(eval(feature_file))
for section in config:
if config.has_option(section, 'numerical'):
- self._features[section]['numerical'] = config[section].getboolean('numerical')
- self._features[section]['categorical'] = config[section].getboolean('categorical')
+ for feature in self._features:
+ if feature.split('_')[0] == section:
+ self._features[feature]['binary'] = config[section].getboolean('binary')
+ self._features[feature]['categorical'] = config[section].getboolean('categorical')
+ self._features[feature]['numerical'] = config[section].getboolean('numerical')
- self._numerical_columns = [k for k in self._features if self._features[k]['numerical']]
+ self._binary_columns = [k for k in self._features if self._features[k]['binary']]
self._categorical_columns = [k for k in self._features if self._features[k]['categorical']]
-
-
+ self._numerical_columns = [k for k in self._features if self._features[k]['numerical']]
@property
def start(self):
def start(self, x):
self._start = x
-
@property
def end(self):
return self._end
def end(self, x):
self._end = x
-
@property
def timestep(self):
return self._timestep
def timestep(self, x):
self._timestep = x
-
def _fill_dict(self):
'''
Add datetime keys in the dated feature dictionary that are missing. The
while current <= self._end:
self._datetimes.append(current)
if current not in self._dict_features:
- self._dict_features[current] = {feature:np.NaN
+ self._dict_features[current] = {feature: np.NaN
for feature in self._features}
else:
- null_dict = {feature:np.NaN
+ null_dict = {feature: np.NaN
for feature in self._features}
null_dict.update(self._dict_features[current])
self._dict_features[current] = null_dict
current += self._timestep
for k in self._dict_features:
- null_dict = {feature:np.NaN
+ null_dict = {feature: np.NaN
for feature in self._features}
null_dict.update(self._dict_features[k])
self._dict_features[k] = null_dict
self._full_dict = {k: self._dict_features[k]
for k in sorted(self._dict_features.keys())}
-
-
@property
def full_dict(self):
'''
self._fill_dict()
return self._full_dict
-
def _fill_nan(self):
'''
Fill NaN values, either by propagation or by interpolation (linear or splines)
elif self._config['PREPROCESSING']['fill_method'] == 'spline':
self._dataframe[self._numerical_columns] =\
self._dataframe[self._numerical_columns].interpolate(method='spline',
- order=self._config['PREPROCESSING'].getint('order'))
+ order=self._config['PREPROCESSING'].getint('order'))
# For the categorical columns, NaN values are filled by duplicating
# the last known value (forward fill method)
self._dataframe = self._dataframe.drop(['row_ok'], axis=1)
logger.info("Rows dropped")
-
def _add_history(self):
'''
Integrating previous nb of interventions as features
'''
logger.info("Integrating previous nb of interventions as features")
- nb_lines = self._config['HISTORY_KNOWLEDGE'].getint('nb_lines')
- for k in range(1,nb_lines+1):
- name = 'history_'+str(nb_lines-k+1)
- self._dataframe[name] = [np.NaN]*k + list(self._dict_target.values())[:-k]
+ nb_lines = eval(self._config['HISTORY_KNOWLEDGE']['nb_lines'])
+ for k in range(1, nb_lines + 1):
+ name = 'history_' + str(nb_lines - k + 1)
+ self._dataframe[name] = [np.NaN] * k + list(self._dict_target.values())[:-k]
self._numerical_columns.append(name)
self._dataframe = self._dataframe[nb_lines:]
-
-
def _standardize(self):
'''
Normalizing numerical features
self._dataframe[self._numerical_columns] =\
preprocessing.scale(self._dataframe[self._numerical_columns])
-
-
def _one_hot_encoding(self):
'''
Apply a one hot encoding for category features
'''
logger.info("One hot encoding for categorical feature")
-
# We store numerical columns
df_out = pd.DataFrame()
- for col in self._numerical_columns:
+ for col in self._numerical_columns:
+ df_out[col] = self._dataframe[col]
+ # Idem for binary features
+ for col in self._binary_columns:
df_out[col] = self._dataframe[col]
# The one hot encoding
for col in self._categorical_columns:
- pd1 = pd.get_dummies(self._dataframe[col],prefix=col)
+ pd1 = pd.get_dummies(self._dataframe[col], prefix=col)
for col1 in pd1.columns:
df_out[col1] = pd1[col1]
self._dataframe = df_out
-
@property
def dataframe(self):
'''
self._one_hot_encoding()
return self._dataframe
-
@dataframe.setter
def dataframe(self, df):
self._dataframe = df
from jours_feries_france.compute import JoursFeries
from logging import getLogger
from logging.config import fileConfig
+from pathlib import Path
from vacances_scolaires_france import SchoolHolidayDates
import itertools
bankHolidaysEve = tuple(u-timedelta(days=1) for u in bankHolidays)
name = self._config['ZONE']['name']
date = self._start
+ Date = datetime.date(date)
+ tomorrow = date + timedelta(days=1)
+ Tomorrow = datetime.date(tomorrow)
d = SchoolHolidayDates()
+ dict_hour = {
+ 'bankHolidays' : Date in bankHolidays,
+ 'bankHolidaysEve': Date in bankHolidaysEve,
+ 'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
+ 'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
+ }
while date <= self._end:
- Date = datetime.date(date)
- tomorrow = date + timedelta(days=1)
- Tomorrow = datetime.date(tomorrow)
- dict_hour = {
- 'bankHolidays' : Date in bankHolidays,
- 'bankHolidaysEve': Date in bankHolidaysEve,
- 'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
- 'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
- }
self._dated_features[date] = dict_hour
+ current = date
date += timedelta(hours=1)
+ if date.day != current.day:
+ Date = datetime.date(date)
+ tomorrow = date + timedelta(days=1)
+ Tomorrow = datetime.date(tomorrow)
+ dict_hour = {
+ 'bankHolidays' : Date in bankHolidays,
+ 'bankHolidaysEve': Date in bankHolidaysEve,
+ 'holidays': d.is_holiday_for_zone(Date, self._get_academic_zone(name, date)),
+ 'holidaysEve': d.is_holiday_for_zone(Tomorrow, self._get_academic_zone(name, tomorrow))
+ }
return self._dated_features
\ No newline at end of file
if (date >= self._start and date <= self._end)\
or (date.year == self._start.year and date.month == self._start.month)\
or (date.year == self._end.year and date.month == self._end.month):
- logger.info(f'Inserting {csv_meteo} in intervention dictionary')
+ logger.info(f'Adding meteofrance features from {csv_meteo}')
with open(dir_data / csv_meteo, "r") as f:
reader = DictReader(f, delimiter=';')
for row in reader:
--- /dev/null
+from configparser import ConfigParser
+from convertdate import islamic
+from datetime import datetime, timedelta
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Ramadan:
+
+ _start = None
+ _end = None
+
+ def __init__(self, config_file):
+
+ self._config = ConfigParser()
+ self._config.read(config_file)
+
+ # Collecting holidays features
+ self._features = [section for section in self._config
+ if self._config[section].getboolean('numerical')
+ or self._config[section].getboolean('categorical')]
+
+ self._dated_features = {}
+
+ @property
+ def start(self):
+ return self._start
+
+ @start.setter
+ def start(self, x):
+ self._start = x
+
+ @property
+ def end(self):
+ return self._end
+
+ @end.setter
+ def end(self, x):
+ self._end = x
+
+ @property
+ def dated_features(self):
+ if self._dated_features == {}:
+ logger.info("Adding Ramadan features")
+ date = self._start
+ while date <= self._end:
+ year, month, day = date.year, date.month, date.day
+ eve = datetime(year, month, day) - timedelta(days=1)
+ tomorrow = datetime(year, month, day) + timedelta(days=1)
+ Hegirian_month = islamic.from_gregorian(year, month, day)[1]
+ dict_hour = {
+ 'ramadanEve': False,
+ 'ramadan': False,
+ 'ramadanDayAfter': False
+ }
+ if Hegirian_month == 8 and\
+ islamic.from_gregorian(tomorrow.year, tomorrow.month, tomorrow.day)[1] == 9:
+ dict_hour['ramadanEve'] = True
+ elif Hegirian_month == 9:
+ dict_hour['ramadan'] = True
+ elif Hegirian_month == 10 and\
+ islamic.from_gregorian(eve.year, eve.month, eve.day)[1] == 9:
+ dict_hour['ramadanDayAfter'] = True
+ self._dated_features[date] = dict_hour
+ date += timedelta(hours=1)
+ return self._dated_features
class Target:
def __init__(self, config_file = None,
- start = None, end = None, timestep = None):
+ start = None, end = None, timestep = None, cumulative = None):
self._config = ConfigParser()
self._config.read(config_file)
self._start = start
self._end = end
self._timestep = timestep
+ self._cumulative = cumulative
logger.info('Initialization of target variable')
self._y = {}
logger.info('Integrating interventions for the whole area')
with open(self._stream_file) as f:
reader = DictReader(f, delimiter=',')
- for row in reader:
- if row['start'] != '':
- start_interv = datetime.strptime(row['start'], '%d/%m/%Y %H:%M:%S')
- start_interv = start_interv.replace(minute=0)
- end_interv = datetime.strptime(row['end'], '%d/%m/%Y %H:%M:%S')
- end_interv = end_interv.replace(minute=0)
- if not (start_interv > self._end or end_interv < self._start):
- if start_interv < self._start and end_interv <= self._end:
- current = self._start
- while current <= end_interv:
- self._y[current] += 1
- current += self._timestep
- elif start_interv >= self._start and end_interv > self._end:
- current = start_interv
- while current not in self._y:
- current -= timedelta(hours=1)
- while current <= self._end:
- self._y[current] += 1
- current += self._timestep
- elif start_interv >= self._start and end_interv <= self._end:
- current = start_interv
- while current not in self._y:
- current -= timedelta(hours=1)
- while current <= end_interv:
- self._y[current] += 1
- current += self._timestep
-
-
-
-
-
+ if self._cumulative:
+ for row in reader:
+ if row['start'] != '':
+ start_interv = datetime.strptime(row['start'], '%d/%m/%Y %H:%M:%S')
+ start_interv = start_interv.replace(minute=0)
+ end_interv = datetime.strptime(row['end'], '%d/%m/%Y %H:%M:%S')
+ end_interv = end_interv.replace(minute=0)
+ if not (start_interv > self._end or end_interv < self._start):
+ if start_interv < self._start and end_interv <= self._end:
+ current = self._start
+ while current <= end_interv:
+ self._y[current] += 1
+ current += self._timestep
+ elif start_interv >= self._start and end_interv > self._end:
+ current = start_interv
+ while current not in self._y:
+ current -= timedelta(hours=1)
+ while current <= self._end:
+ self._y[current] += 1
+ current += self._timestep
+ elif start_interv >= self._start and end_interv <= self._end:
+ current = start_interv
+ while current not in self._y:
+ current -= timedelta(hours=1)
+ while current <= end_interv:
+ self._y[current] += 1
+ current += self._timestep
+ else:
+ for row in reader:
+ if row['start'] != '':
+ start_interv = datetime.strptime(row['start'], '%d/%m/%Y %H:%M:%S')
+ start_interv = start_interv.replace(minute=0)
+ if start_interv in self._y:
+ self._y[start_interv] += 1
Click==7.0
click-plugins==1.1.1
cligj==0.5.0
+convertdate==2.2.0
+cycler==0.10.0
Fiona==1.8.13
geographiclib==1.50
geopandas==0.6.3
geopy==1.21.0
joblib==0.14.1
+jours-feries-france==0.5.1
+kiwisolver==1.1.0
+lightgbm==2.3.1
+matplotlib==3.1.3
munch==2.5.0
numpy==1.18.1
pandas==1.0.1
+PyMeeus==0.3.6
+pyparsing==2.4.6
pyproj==2.4.2.post1
python-dateutil==2.8.1
pytz==2019.3
scipy==1.4.1
Shapely==1.7.0
six==1.14.0
+vacances-scolaires-france==0.7.0
xgboost==0.90
xlrd==1.2.0