predictops/learn/learning.py

   1 from configparser import ConfigParser
   2 from math import sqrt
   3 from sklearn.metrics import mean_squared_error, mean_absolute_error
   4 from sklearn.model_selection import train_test_split
   5
   6 import xgboost
   7
   8 class Learning:
   9
  10     def __init__(self, config_file = None,
  11                  X = None, y = None):
  12         self._config = ConfigParser()
  13         self._config.read(config_file)
  14
  15         df = X
  16         df['cible'] = y
  17
  18         print(df.head())
  19
  20         train_val_set, test_set = train_test_split(df, test_size = 0.2, random_state = 42)
  21         train_set, val_set = train_test_split(train_val_set, test_size = 0.2, random_state = 42)
  22
  23         X_test = test_set.drop('cible', axis = 1)
  24         y_test = test_set['cible'].copy()
  25
  26         X_train = train_set.drop('cible', axis=1)
  27         y_train = train_set['cible'].copy()
  28         X_val = val_set.drop('cible', axis=1)
  29         y_val = val_set['cible'].copy()
  30
  31
  32         if self._config['MODEL']['method'] == 'xgboost':
  33             xgb_reg = xgboost.XGBRegressor(learning_rate = 0.01,
  34                                                    max_depth = 10,
  35                                                    random_state=42,
  36                                                    n_estimators = 173,
  37                                                    n_jobs=-1,
  38                                                    objective = 'count:poisson')
  39
  40             xgb_reg.fit(X_train, y_train,
  41                         eval_set=[(X_val, y_val)],
  42                         early_stopping_rounds=10)
  43
  44             y_test_pred = xgb_reg.predict(X_test)
  45             print(sqrt(mean_squared_error(y_test_pred, y_test)), mean_absolute_error(y_test_pred,y_test))