]> AND Private Git Repository - predictops.git/blob - predictops/learn/learning.py
Logo AND Algorithmique Numérique Distribuée

Private GIT Repository
Learning process: first version
[predictops.git] / predictops / learn / learning.py
1 from configparser import ConfigParser
2 from math import sqrt
3 from sklearn.metrics import mean_squared_error, mean_absolute_error
4 from sklearn.model_selection import train_test_split
5
6 import xgboost
7
8 class Learning:
9
10     def __init__(self, config_file = None,
11                  X = None, y = None):
12         self._config = ConfigParser()
13         self._config.read(config_file)
14
15         df = X
16         df['cible'] = y
17
18         print(df.head())
19
20         train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
21         train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
22
23         X_test = test_set.drop('cible', axis = 1)
24         y_test = test_set['cible'].copy()
25
26         X_train = train_set.drop('cible', axis=1)
27         y_train = train_set['cible'].copy()
28         X_val = val_set.drop('cible', axis=1)
29         y_val = val_set['cible'].copy()
30
31
32         if self._config['MODEL']['method'] == 'xgboost':
33             xgb_reg = xgboost.XGBRegressor(learning_rate = 0.01,
34                                                    max_depth = 10,
35                                                    random_state=42,
36                                                    n_estimators = 173,
37                                                    n_jobs=-1,
38                                                    objective = 'count:poisson')
39
40             xgb_reg.fit(X_train, y_train,
41                         eval_set=[(X_val, y_val)],
42                         early_stopping_rounds=10)
43
44             y_test_pred = xgb_reg.predict(X_test)
45             print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))