From 4b6d71d96bb92791cc31640e5f30378ae6fe63e4 Mon Sep 17 00:00:00 2001 From: Christophe Guyeux Date: Mon, 17 Feb 2020 12:30:47 +0100 Subject: [PATCH 1/1] Starting to investigate the fact that qualitative features with NaN values cannot be filled in the same way than quantitative ones --- config/features/meteofrance_features.csv | 2 +- predictops/learn/preprocessing.py | 14 +++++++------- predictops/source/ephemeris.py | 4 +++- predictops/source/meteofrance.py | 2 +- predictops/source/source.py | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/config/features/meteofrance_features.csv b/config/features/meteofrance_features.csv index 0253d1c..8623303 100644 --- a/config/features/meteofrance_features.csv +++ b/config/features/meteofrance_features.csv @@ -1,4 +1,4 @@ -abbreviation,name,unit,type,type +abbreviation,name,unit,format,type t,temperature,K,real,1 pres,pressure,Pa,integer,1 tend,pressureVariation,Pa,integer,1 diff --git a/predictops/learn/preprocessing.py b/predictops/learn/preprocessing.py index a878a82..49d7ef8 100644 --- a/predictops/learn/preprocessing.py +++ b/predictops/learn/preprocessing.py @@ -48,14 +48,14 @@ class Preprocessing: else: self._features = set(chain.from_iterable([tuple(u.keys()) for u in [*dict_features.values()]])) - for csv_file in listdir(): - with open(csv_file, "r") as f: - reader = DictReader(f, delimiter=',') - dico_features = {{row['name']: row['type'] # qualitative (2) or quantitative (1) - } - for row in reader if row['name'] in self._features} - + csv_files = Path.cwd() / 'config' / 'features' self._features = {feat : None for feat in self._features} + for csv_file in listdir(csv_files): + with open(csv_files / csv_file, "r") as f: + reader = DictReader(f, delimiter=',') + for row in reader: + if row['name'] in self._features: + self._features[row['name']] = row['type'] print(self._features) exit() diff --git a/predictops/source/ephemeris.py b/predictops/source/ephemeris.py index d0e4ca0..2a34364 100644 --- a/predictops/source/ephemeris.py +++ b/predictops/source/ephemeris.py @@ -1,3 +1,5 @@ +from .source import Source + from configparser import ConfigParser from csv import DictReader from datetime import datetime, timedelta @@ -16,7 +18,7 @@ class Ephemeris: def __init__(self, config_file): # Check for the integrity of feature names - super(Source, self).__init__() + Source.__init__(self) self._config = ConfigParser() self._config.read(config_file) diff --git a/predictops/source/meteofrance.py b/predictops/source/meteofrance.py index 3d8ae88..6bd23ed 100644 --- a/predictops/source/meteofrance.py +++ b/predictops/source/meteofrance.py @@ -53,7 +53,7 @@ class MeteoFrance(Source): ''' # Check for the integrity of feature names - super(Source, self).__init__() + Source.__init__(self) self._config = ConfigParser() self._config.read(config_file) diff --git a/predictops/source/source.py b/predictops/source/source.py index 714ed12..8e68716 100644 --- a/predictops/source/source.py +++ b/predictops/source/source.py @@ -17,7 +17,7 @@ class Source: csv_files = Path.cwd() / 'config' / 'features' list_of_names = [] for csv_file in listdir(csv_files): - with open(csv_file, "r") as f: + with open(csv_files / csv_file, "r") as f: reader = DictReader(f, delimiter=',') list_of_names.extend([row['name'] for row in reader]) if len(list_of_names) != len(set(list_of_names)): -- 2.39.5