From: Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Date: Mon, 17 Feb 2020 11:30:47 +0000 (+0100)
Subject: Starting to investigate the fact that qualitative features with NaN
X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/predictops.git/commitdiff_plain/4b6d71d96bb92791cc31640e5f30378ae6fe63e4?ds=sidebyside

Starting to investigate the fact that qualitative features with NaN
values cannot be filled in the same way than quantitative ones
---

diff --git a/config/features/meteofrance_features.csv b/config/features/meteofrance_features.csv
index 0253d1c..8623303 100644
--- a/config/features/meteofrance_features.csv
+++ b/config/features/meteofrance_features.csv
@@ -1,4 +1,4 @@
-abbreviation,name,unit,type,type
+abbreviation,name,unit,format,type
 t,temperature,K,real,1
 pres,pressure,Pa,integer,1
 tend,pressureVariation,Pa,integer,1
diff --git a/predictops/learn/preprocessing.py b/predictops/learn/preprocessing.py
index a878a82..49d7ef8 100644
--- a/predictops/learn/preprocessing.py
+++ b/predictops/learn/preprocessing.py
@@ -48,14 +48,14 @@ class Preprocessing:
         else:
             self._features = set(chain.from_iterable([tuple(u.keys())
                                                       for u in [*dict_features.values()]]))
-        for csv_file in listdir():
-            with open(csv_file, "r") as f:
-                reader = DictReader(f, delimiter=',')
-                dico_features = {{row['name']: row['type']  # qualitative (2) or quantitative (1)
-                                    }
-                                for row in reader if row['name'] in self._features}
-
+        csv_files = Path.cwd() / 'config' / 'features'
         self._features = {feat : None for feat in self._features}
+        for csv_file in listdir(csv_files):
+            with open(csv_files / csv_file, "r") as f:
+                reader = DictReader(f, delimiter=',')
+                for row in reader:
+                    if row['name'] in self._features:
+                        self._features[row['name']] = row['type']
         print(self._features)
         exit()
 
diff --git a/predictops/source/ephemeris.py b/predictops/source/ephemeris.py
index d0e4ca0..2a34364 100644
--- a/predictops/source/ephemeris.py
+++ b/predictops/source/ephemeris.py
@@ -1,3 +1,5 @@
+from .source import Source
+
 from configparser import ConfigParser
 from csv import DictReader
 from datetime import datetime, timedelta
@@ -16,7 +18,7 @@ class Ephemeris:
     def __init__(self, config_file):
 
         # Check for the integrity of feature names
-        super(Source, self).__init__()
+        Source.__init__(self)
 
         self._config = ConfigParser()
         self._config.read(config_file)
diff --git a/predictops/source/meteofrance.py b/predictops/source/meteofrance.py
index 3d8ae88..6bd23ed 100644
--- a/predictops/source/meteofrance.py
+++ b/predictops/source/meteofrance.py
@@ -53,7 +53,7 @@ class MeteoFrance(Source):
 
         '''
         # Check for the integrity of feature names
-        super(Source, self).__init__()
+        Source.__init__(self)
 
         self._config = ConfigParser()
         self._config.read(config_file)
diff --git a/predictops/source/source.py b/predictops/source/source.py
index 714ed12..8e68716 100644
--- a/predictops/source/source.py
+++ b/predictops/source/source.py
@@ -17,7 +17,7 @@ class Source:
         csv_files = Path.cwd() / 'config' / 'features'
         list_of_names = []
         for csv_file in listdir(csv_files):
-            with open(csv_file, "r") as f:
+            with open(csv_files / csv_file, "r") as f:
                 reader = DictReader(f, delimiter=',')
                 list_of_names.extend([row['name'] for row in reader])
         if len(list_of_names) != len(set(list_of_names)):