1 from configparser import ConfigParser
3 from sklearn.metrics import mean_squared_error, mean_absolute_error
4 from sklearn.model_selection import train_test_split
10 def __init__(self, config_file = None,
12 self._config = ConfigParser()
13 self._config.read(config_file)
18 train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
19 train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
21 X_test = test_set.drop('cible', axis = 1)
22 y_test = test_set['cible'].copy()
24 X_train = train_set.drop('cible', axis=1)
25 y_train = train_set['cible'].copy()
26 X_val = val_set.drop('cible', axis=1)
27 y_val = val_set['cible'].copy()
30 if self._config['MODEL']['method'] == 'xgboost':
32 xgb_reg = xgboost.XGBRegressor(learning_rate = self._config['HYPERPARAMETERS'].getfloat('learning_rate'),
33 max_depth = self._config['HYPERPARAMETERS'].getint('max_depth'),
34 random_state = self._config['HYPERPARAMETERS'].getint('random_state'),
35 n_estimators = self._config['HYPERPARAMETERS'].getint('n_estimators'),
36 n_jobs = self._config['HYPERPARAMETERS'].getint('n_jobs'),
37 objective = 'count:poisson')
39 xgb_reg.fit(X_train, y_train,
40 eval_set=[(X_val, y_val)],
41 early_stopping_rounds=10)
43 y_test_pred = xgb_reg.predict(X_test)
44 print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))