New version now drived by config files

author Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>

Sun, 16 Feb 2020 10:45:29 +0000 (11:45 +0100)

committer Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>

Sun, 16 Feb 2020 10:45:29 +0000 (11:45 +0100)
author Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Sun, 16 Feb 2020 10:45:29 +0000 (11:45 +0100)
committer Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
Sun, 16 Feb 2020 10:45:29 +0000 (11:45 +0100)
diff --git a/config/feature_ephemeris.cfg b/config/feature_ephemeris.cfg

new file mode 100644 (file)

index 0000000..6b37dcf
--- /dev/null
+++ b/config/feature_ephemeris.cfg
@@ -0,0 +1,14 @@
+[FEATURES]
+hour       = True
+dayInWeek  = True
+dayInMonth = True
+dayInYear  = True
+weekInYear = True
+month      = True
+year       = True
+
+[HOUR]
+numerical  = True
+
+[YEAR]
+numerical  = True
+\ No newline at end of file
diff --git a/config/feature_meteo.cfg b/config/feature_meteo.cfg

new file mode 100644 (file)

index 0000000..02bdab7
--- /dev/null
+++ b/config/feature_meteo.cfg
@@ -0,0 +1,25 @@
+[GENERAL]
+regenerate = False
+reinsert   = True
+
+[POSITION]
+latitude  = 47.25
+longitude = 6.0333
+
+[STATIONS]
+nb_stations = 3
+
+[FEATURES]
+temperature            = True
+pressure               = True
+pressureVariation      = False
+barometricTrend        = False
+humidity               = False
+dewPoint               = False
+lastHourRainfall       = False
+last3hHourRainfall     = False
+meanWindSpeed10min     = False
+meanWindDirection10min = False
+gustsOverAPeriod       = False
+horizontalVisibility   = False
+currentWeather         = False
+\ No newline at end of file
diff --git a/config/features.cfg b/config/features.cfg

deleted file mode 100644 (file)

index 927fb92..0000000
--- a/config/features.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-[meteofrance]
-regenerate = False
-reinsert   = True
diff --git a/config/features/meteofrance_features.csv b/config/features/meteofrance_features.csv

index f3303ea812b964c1dbfc51821f0937ad9789b9a7..0253d1ccbd6472d8a057709aeef42b7a38436bcf 100644 (file)
--- a/config/features/meteofrance_features.csv
+++ b/config/features/meteofrance_features.csv
@@ -2,7 +2,7 @@ abbreviation,name,unit,type,type
  t,temperature,K,real,1
  pres,pressure,Pa,integer,1
  tend,pressureVariation,Pa,integer,1
  t,temperature,K,real,1
  pres,pressure,Pa,integer,1
  tend,pressureVariation,Pa,integer,1
-cod_tend,BarometricTrend,code,integer,2
+cod_tend,barometricTrend,code,integer,2
  u,humidity,%,integer,1
  td,dewPoint,K,real,1
  rr1,lastHourRainfall,mm,real,1
  u,humidity,%,integer,1
  td,dewPoint,K,real,1
  rr1,lastHourRainfall,mm,real,1
diff --git a/config/learn.cfg b/config/learn.cfg

new file mode 100644 (file)

index 0000000..bbd3557
--- /dev/null
+++ b/config/learn.cfg
@@ -0,0 +1,19 @@
+[DATETIME]
+start    = 01/01/2010 01:00:00
+end      = 12/31/2010 23:00:00
+hourStep = 6
+
+
+[FEATURES]
+meteofrance = True
+ephemeris   = True
+
+
+[FEATURE_CONFIG]
+meteofrance = (Path.cwd() / 'config') / 'feature_meteo.cfg'
+ephemeris   = (Path.cwd() / 'config') / 'feature_ephemeris.cfg'
+
+
+[PREPROCESSING]
+fill_method = spline
+order       = 3
+\ No newline at end of file
diff --git a/config/main.cfg b/config/main.cfg

deleted file mode 100644 (file)

index 942ef96..0000000
--- a/config/main.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-[postgresql]
-host   = localhost
-user   = christophe
-port   = 5432
-dbname = extome
diff --git a/main.py b/main.py

index a42ce0f11cb9b42feae17fa17e1abcdf513b8570..426d3b580fdbed6586b7111ce6ffb17c1172824a 100644 (file)
--- a/main.py
+++ b/main.py
@@ -1,89 +1,35 @@
-from predictops.source.ephemeris import Ephemeris
-from predictops.source.meteofrance import MeteoFrance
+from predictops.engine import Engine
  from predictops.learn.preprocessing import Preprocessing
  from predictops.target.toarea import ToArea
  
  from predictops.learn.preprocessing import Preprocessing
  from predictops.target.toarea import ToArea
  
-from datetime import datetime, timedelta
  from logging import getLogger
  from logging.config import fileConfig
  from pathlib import Path
  from logging import getLogger
  from logging.config import fileConfig
  from pathlib import Path
-from shutil import rmtree
  
  import geopandas as gpd
  
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
  
  import geopandas as gpd
  
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
+if __name__ == '__main__':
  
  
-class Engine:
-    def __init__(self, start = None, end = None, time_step = None):
-        self._X = {}
-        self._Y = {}
+    config = (Path.cwd() / 'config') / 'learn.cfg'
+    engine = Engine(config_file = config)
  
  
+    engine.add_features()
+    #print(engine.X)
+    process = Preprocessing(config_file = config, dict_features = engine.X)
  
  
-    def clean(self):
-        # Cleaning the data directory
-        logger.info("Cleaning and restoring data directory")
-        directory  = Path.cwd() / 'data'
-        if directory.is_dir():
-            rmtree(directory)
-        p = Path(Path.cwd() / 'data')
-        p.mkdir()
  
  
+    #print(engine.X[datetime.strptime('06/30/2010 21:00:00', '%m/%d/%Y %H:%M:%S')])
+    print(process.dataframe.head(n=20))
+    print(process.dataframe.tail(n=20))
+    exit()
  
  
-    def add_feature(self, name, **kw):
+    depts = gpd.read_file( Path.cwd() / 'data' / 'targets' / 'departments' / "departements-20180101.shp")
+    Doubs = depts.loc[depts['nom'] == 'Doubs'].iloc[0]
  
  
-        if name == 'meteofrance':
-            meteofeature = MeteoFrance(**kw)
-            meteofeature.update()
-            dated_features = meteofeature.dated_features
-            for date in dated_features:
-                self._X.setdefault(date,{}).update(dated_features[date])
-        elif name == 'ephemeris':
-            ephemerides = Ephemeris(**kw)
-            dated_features = ephemerides.dated_features
-            for date in dated_features:
-                self._X.setdefault(date,{}).update(dated_features[date])
-
-
-    @property
-    def X(self):
-        return self._X
-
-    @X.setter
-    def X(self, x):
-        self._X = x
-
-
-start = datetime.strptime('01/01/2010 01:00:00', '%m/%d/%Y %H:%M:%S')
-end = datetime.strptime('12/31/2010 23:00:00', '%m/%d/%Y %H:%M:%S')
-
-engine = Engine()
-engine.add_feature(name = 'meteofrance',
-                   start = start, end = end,
-                   latitude = 47.25, longitude = 6.0333, nb_stations = 3,
-                   features = ['temperature', 'pressure'])
-
-
-engine.add_feature(name = 'ephemeris',
-                   start = start, end = end,
-                   features = ['hour', 'dayInWeek', 'dayInMonth', 'dayInYear',
-                               'weekInYear', 'month', 'year'])
-
-
-process = Preprocessing(dict_features = engine.X,
-                   start = start, end = end, timestep = timedelta(hours=6))
-
-
-df = process.dataframe.head(n=20)
-#print(engine.X[datetime.strptime('06/30/2010 21:00:00', '%m/%d/%Y %H:%M:%S')])
-print(df)
-exit()
-
-depts = gpd.read_file( Path.cwd() / 'data' / 'targets' / 'departments' / "departements-20180101.shp")
-Doubs = depts.loc[depts['nom'] == 'Doubs'].iloc[0]
-
-ToArea(area=Doubs.geometry,
-       start = start, end = end,
-       csv_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
+    ToArea(area=Doubs.geometry,
+           start = start, end = end,
+           csv_file = Path.cwd() / 'data' / 'targets' / 'sdis25' / 'interventions.csv')
  
  
diff --git a/predictops/engine.py b/predictops/engine.py

new file mode 100644 (file)

index 0000000..2ec62df
--- /dev/null
+++ b/predictops/engine.py
@@ -0,0 +1,74 @@
+from configparser import ConfigParser
+from datetime import datetime, timedelta
+from logging import getLogger
+from logging.config import fileConfig
+from pathlib import Path
+from shutil import rmtree
+
+from predictops.source.ephemeris import Ephemeris
+from predictops.source.meteofrance import MeteoFrance
+
+fileConfig((Path.cwd() / 'config') / 'logging.cfg')
+logger = getLogger()
+
+
+class Engine:
+
+    def __init__(self, config_file = (Path.cwd() / 'config') / 'learn.cfg'):
+        self._config = ConfigParser()
+        self._config.read(config_file)
+        self._start = datetime.strptime(self._config['DATETIME']['start'],
+                                        '%m/%d/%Y %H:%M:%S')
+        self._end = datetime.strptime(self._config['DATETIME']['end'],
+                                        '%m/%d/%Y %H:%M:%S')
+
+        self._timestep = timedelta(hours =
+                                   self._config['DATETIME'].getfloat('hourStep'))
+
+        self._X = {}
+        self._Y = {}
+
+
+
+    def clean(self):
+        # Cleaning the data directory
+        logger.info("Cleaning and restoring data directory")
+        directory  = Path.cwd() / 'data'
+        if directory.is_dir():
+            rmtree(directory)
+        p = Path(Path.cwd() / 'data')
+        p.mkdir()
+
+
+    def add_features(self):
+        if self._config['FEATURES'].getboolean('meteofrance'):
+            meteofeature = MeteoFrance(config_file =
+                                       eval(self._config['FEATURE_CONFIG']['meteofrance']))
+
+            meteofeature.start = self._start
+            meteofeature.end = self._end
+
+            meteofeature.update()
+            dated_features = meteofeature.dated_features
+            for date in dated_features:
+                self._X.setdefault(date,{}).update(dated_features[date])
+
+        if self._config['FEATURES'].getboolean('ephemeris'):
+            ephemerides = Ephemeris(config_file =
+                                    eval(self._config['FEATURE_CONFIG']['ephemeris']))
+
+            ephemerides.start = self._start
+            ephemerides.end = self._end
+
+            dated_features = ephemerides.dated_features
+            for date in dated_features:
+                self._X.setdefault(date,{}).update(dated_features[date])
+
+
+    @property
+    def X(self):
+        return self._X
+
+    @X.setter
+    def X(self, x):
+        self._X = x
+\ No newline at end of file
diff --git a/predictops/learn/preprocessing.py b/predictops/learn/preprocessing.py

index 833e48316bffa1c51affc6885210b71f61b2c1d1..5400d1d39f1135ce5e2abcfec2541201cf5d8ed6 100644 (file)
--- a/predictops/learn/preprocessing.py
+++ b/predictops/learn/preprocessing.py
@@ -1,3 +1,5 @@
+from configparser import ConfigParser
+from datetime import datetime, timedelta
  from itertools import chain
  from logging import getLogger
  from logging.config import fileConfig
  from itertools import chain
  from logging import getLogger
  from logging.config import fileConfig
@@ -18,19 +20,22 @@ class Preprocessing:
       - Missing datetimes are added first with np.NaN feature values,
       - The dataframe is then constructed based on the filled feature dictionary,
       - NaN values are then filled with last known values.
       - Missing datetimes are added first with np.NaN feature values,
       - The dataframe is then constructed based on the filled feature dictionary,
       - NaN values are then filled with last known values.
-
      '''
      '''
-    def __init__(self, dict_features,
-                 start, end, timestep,
-                 features = None):
+
+    def __init__(self, config_file = None, dict_features = None, features = None):
          '''
          Constructor that defines all needed attributes and collects features.
          '''
          '''
          Constructor that defines all needed attributes and collects features.
          '''
-        logger.info("Entering  NaN values in the feature dataframe")
+        self._config = ConfigParser()
+        self._config.read(config_file)
+
+        self._start = datetime.strptime(self._config['DATETIME']['start'],
+                                        '%m/%d/%Y %H:%M:%S')
+        self._end = datetime.strptime(self._config['DATETIME']['end'],
+                                        '%m/%d/%Y %H:%M:%S')
+        self._timestep = timedelta(hours =
+                                   self._config['DATETIME'].getfloat('hourStep'))
          self._dict_features = dict_features
          self._dict_features = dict_features
-        self._start = start
-        self._end = end
-        self._timestep = timestep
          self._full_dict = None
          self._dataframe = None
          self._datetimes = []
          self._full_dict = None
          self._dataframe = None
          self._datetimes = []
@@ -43,6 +48,33 @@ class Preprocessing:
                                                        for u in [*dict_features.values()]]))
  
  
                                                        for u in [*dict_features.values()]]))
  
  
+    @property
+    def start(self):
+        return self._start
+
+    @start.setter
+    def start(self, x):
+        self._start = x
+
+
+    @property
+    def end(self):
+        return self._end
+
+    @end.setter
+    def end(self, x):
+        self._end = x
+
+
+    @property
+    def timestep(self):
+        return self._timestep
+
+    @timestep.setter
+    def timestep(self, x):
+        self._timestep = x
+
+
      def _fill_dict(self):
          '''
          Add datetime keys in the dated feature dictionary that are missing. The
      def _fill_dict(self):
          '''
          Add datetime keys in the dated feature dictionary that are missing. The
@@ -94,14 +126,22 @@ class Preprocessing:
              self._dataframe = pd.DataFrame.from_dict(self.full_dict,
                                                       orient='index')
              logger.info("Filling NaN values in the feature dataframe")
              self._dataframe = pd.DataFrame.from_dict(self.full_dict,
                                                       orient='index')
              logger.info("Filling NaN values in the feature dataframe")
-            #TODO: add other filling methods like linear interpolation
-            self._dataframe = self._dataframe.fillna(method='ffill')
+
+            if self._config['PREPROCESSING']['fill_method'] == 'propagate':
+                self._dataframe = self._dataframe.fillna(method='ffill')
+            elif self._config['PREPROCESSING']['fill_method'] == 'linear':
+                self._dataframe = self._dataframe.interpolate()
+            elif self._config['PREPROCESSING']['fill_method'] == 'spline':
+                self._dataframe = self._dataframe.interpolate(method='spline',
+                                                              order=self._config['PREPROCESSING'].getint('order'))
              self._dataframe = self._dataframe.fillna(method='bfill')
              self._dataframe = self._dataframe.fillna(method='bfill')
+
              self._dataframe = self._dataframe.drop([k.to_pydatetime()
                                                     for k in self._dataframe.T
                                                     if k not in self._datetimes])
          return self._dataframe
  
              self._dataframe = self._dataframe.drop([k.to_pydatetime()
                                                     for k in self._dataframe.T
                                                     if k not in self._datetimes])
          return self._dataframe
  
+
      @dataframe.setter
      def dataframe(self, df):
          self._dataframe = df
      @dataframe.setter
      def dataframe(self, df):
          self._dataframe = df
diff --git a/predictops/source/ephemeris.py b/predictops/source/ephemeris.py

index 33c0f2d31f2ff26beb7dcced68cc08033c495ea1..e46b296ad09efd78afdff11163e43f3606ce67fc 100644 (file)
--- a/predictops/source/ephemeris.py
+++ b/predictops/source/ephemeris.py
@@ -1,21 +1,48 @@
+from configparser import ConfigParser
+from csv import DictReader
  from datetime import datetime, timedelta
  from datetime import datetime, timedelta
+from pathlib import Path
+
  import time
  import calendar
  
  import time
  import calendar
  
+CSV_FILE = Path.cwd() / 'config' / 'features' / 'ephemeris_features.csv'
+
  class Ephemeris:
  
  class Ephemeris:
  
-    def __init__(self, start = time.strptime('19960101000000', '%Y%m%d%H%M%S'),
-                 end = datetime.now(), features = []):
-        self._start = start
-        self._end = end
-        self._features = features
+    _start = None
+    _end   = None
+
+    def __init__(self, config_file):
+
+        self._config = ConfigParser()
+        self._config.read(config_file)
+
+        # Collecting ephemeris features
+        with open(CSV_FILE, "r") as f:
+            reader = DictReader(f, delimiter=',')
+            self._features = [row['name'] for row in reader
+                              if self._config['FEATURES'].getboolean(row['name'])]
  
          self._dated_features = {}
  
  
  
          self._dated_features = {}
  
  
+    @property
+    def start(self):
+        return self._start
+
+    @start.setter
+    def start(self, x):
+        self._start = x
+
+
+    @property
+    def end(self):
+        return self._end
  
  
-    def update(self):
-        pass
+    @end.setter
+    def end(self, x):
+        self._end = x
  
  
  
  
  
  
diff --git a/predictops/source/meteofrance.py b/predictops/source/meteofrance.py

index 5a885ee2cac64915bd54359699c35ed7d5365d14..afe18ad82e77efc3e495881fe540ad143f1d1284 100644 (file)
--- a/predictops/source/meteofrance.py
+++ b/predictops/source/meteofrance.py
@@ -16,12 +16,19 @@ import gzip
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
  fileConfig((Path.cwd() / 'config') / 'logging.cfg')
  logger = getLogger()
  
+CSV_FILE = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
+
+
  class MeteoFrance:
  
  class MeteoFrance:
  
-    def __init__(self, latitude = 47.25, longitude = 6.0333, nb_stations = 3,
-                 start = datetime.strptime('19960101000000', '%Y%m%d%H%M%S'),
-                 end = datetime.now(),
-                 features = []):
+    _latitude    = None
+    _longitude   = None
+    _nb_stations = None
+    _start       = None
+    _end         = None
+    _features    = None
+
+    def __init__(self, config_file):
          '''
          Constructor of the MeteoFrance source of feature.
  
          '''
          Constructor of the MeteoFrance source of feature.
  
@@ -34,33 +41,80 @@ class MeteoFrance:
      https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
  
          Parameters:
      https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32
  
          Parameters:
+          - in config file:
              latitude (float): The latitude from which we want the meteo features.
              longitude (float): The longitude from which we want the meteo features.
              nb_stations (int): Number of closest stations to consider.
              latitude (float): The latitude from which we want the meteo features.
              longitude (float): The longitude from which we want the meteo features.
              nb_stations (int): Number of closest stations to consider.
+          - provided to the constructor
              features (list): Weather features that have to be integrated, according
                    to their names in meteofrance_features.csv (cf. config directory)
  
          '''
              features (list): Weather features that have to be integrated, according
                    to their names in meteofrance_features.csv (cf. config directory)
  
          '''
-        self._latitude = latitude
-        self._longitude = longitude
-        self._nb_stations = nb_stations
-        self._start = start
-        self._end = end
-        self._features = features
+        self._config = ConfigParser()
+        self._config.read(config_file)
  
          self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
  
          self._dated_features = None
  
          # Re-creating data directory architecture for MeteoFrance, if asked
  
          self._data_directory = (Path.cwd() / 'data') / 'features' / 'meteo_france'
  
          self._dated_features = None
  
          # Re-creating data directory architecture for MeteoFrance, if asked
-        config = ConfigParser()
-        config.read((Path.cwd() / 'config') / 'features.cfg')
-        if eval(config['meteofrance']['regenerate']):
+        if self._config['GENERAL'].getboolean('regenerate'):
              self._regenerate_directory()
  
          # Collecting the closest meteo station
              self._regenerate_directory()
  
          # Collecting the closest meteo station
+        self._nb_stations = self._config['STATIONS'].getint('nb_stations')
          self._stations = self._get_stations()
  
          self._stations = self._get_stations()
  
+        # Collecting meteofrance features
+        with open(CSV_FILE, "r") as f:
+            reader = DictReader(f, delimiter=',')
+            self._features = [row['name'] for row in reader
+                              if self._config['FEATURES'].getboolean(row['name'])]
+
+
+    @property
+    def start(self):
+        return self._start
+
+    @start.setter
+    def start(self, x):
+        self._start = x
+
+
+    @property
+    def end(self):
+        return self._end
+
+    @end.setter
+    def end(self, x):
+        self._end = x
+
+
+    @property
+    def latitude(self):
+        return self._latitude
+
+    @latitude.setter
+    def latitude(self, x):
+        self._latitude = x
+
+
+    @property
+    def longitude(self):
+        return self._longitude
+
+    @longitude.setter
+    def longitude(self, x):
+        self._longitude = x
+
+
+    @property
+    def nb_stations(self):
+        return self._nb_stations
+
+    @nb_stations.setter
+    def nb_stations(self, x):
+        self._nb_stations = x
  
  
      def _regenerate_directory(self):
  
  
      def _regenerate_directory(self):
@@ -193,10 +247,9 @@ class MeteoFrance:
              dict: the dictionary of features per datestamp
          '''
          if self._dated_features == None:
              dict: the dictionary of features per datestamp
          '''
          if self._dated_features == None:
-            csv_file = Path.cwd() / 'config' / 'features' / 'meteofrance_features.csv'
-            logger.info(f'Collecting meteo feature information from {csv_file}')
+            logger.info(f'Collecting meteo feature information from {CSV_FILE}')
              # A dictionary for the features
              # A dictionary for the features
-            with open(csv_file, "r") as f:
+            with open(CSV_FILE, "r") as f:
                  reader = DictReader(f, delimiter=',')
                  dico_features = {row["abbreviation"]:
                                     {
                  reader = DictReader(f, delimiter=',')
                  dico_features = {row["abbreviation"]:
                                     {
@@ -204,6 +257,8 @@ class MeteoFrance:
                                         'type': row['type']  # qualitative (2) or quantitative (1)
                                      }
                                  for row in reader if row['name'] in self._features}
                                         'type': row['type']  # qualitative (2) or quantitative (1)
                                      }
                                  for row in reader if row['name'] in self._features}
+                #print([row for row in reader])
+                #print([row for row in reader if row['name'] in self._features])
              dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
              self._dated_features = {}
              for csv_meteo in listdir(dir_data):
              dir_data = Path.cwd() / 'data' / 'features' / 'meteo_france' / 'historical'
              self._dated_features = {}
              for csv_meteo in listdir(dir_data):
author	Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
	Sun, 16 Feb 2020 10:45:29 +0000 (11:45 +0100)
committer	Christophe Guyeux <christophe.guyeux@univ-fcomte.fr>
	Sun, 16 Feb 2020 10:45:29 +0000 (11:45 +0100)
config/feature_ephemeris.cfg	[new file with mode: 0644]	patch \| blob
config/feature_meteo.cfg	[new file with mode: 0644]	patch \| blob
config/features.cfg	[deleted file]	patch \| blob \| history
config/features/meteofrance_features.csv		patch \| blob \| history
config/learn.cfg	[new file with mode: 0644]	patch \| blob
config/main.cfg	[deleted file]	patch \| blob \| history
main.py		patch \| blob \| history
predictops/engine.py	[new file with mode: 0644]	patch \| blob
predictops/learn/preprocessing.py		patch \| blob \| history
predictops/source/ephemeris.py		patch \| blob \| history
predictops/source/meteofrance.py		patch \| blob \| history