+from configparser import ConfigParser
from csv import DictReader
from logging import getLogger
from logging.config import fileConfig
class Source:
def __init__(self):
'''
- Check if the same feature name is used in two different feature sources
+ Check if the same feature name is used in two different feature sources,
+ and if the sources of type 3 (being both categorical and numerical) have
+ a specified type in the feature_...cfg file
'''
logger.info('Check for redondant feature names')
- csv_files = Path.cwd() / 'config' / 'features'
+ feature_files = Path.cwd() / 'config' / 'features'
list_of_names = []
- for csv_file in listdir(csv_files):
- with open(csv_files / csv_file, "r") as f:
- reader = DictReader(f, delimiter=',')
- list_of_names.extend([row['name'] for row in reader])
+ for file_name in listdir(feature_files ):
+ if file_name.endswith('csv'):
+ with open(feature_files / file_name, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ list_of_names.extend([row['name'] for row in reader])
+
if len(list_of_names) != len(set(list_of_names)):
- raise ValueError("At least two features have the same name")
\ No newline at end of file
+ raise ValueError("At least two features have the same name")
+
+ logger.info('Check for specified feature types')
+ names_of_mixed_types = []
+ for file_name in listdir(feature_files):
+ if file_name.endswith('csv'):
+ with open(feature_files / file_name, "r") as f:
+ reader = DictReader(f, delimiter=',')
+ names_of_mixed_types.extend([row['name'] for row in reader
+ if row['type'] == '3'])
+
+ cfg_names_of_mixed_types = []
+ for file_name in listdir(feature_files):
+ if file_name.endswith('cfg'):
+ config = ConfigParser()
+ config.read(feature_files / file_name)
+ for section in config:
+ if config.has_option(section, 'numerical'):
+ cfg_names_of_mixed_types.append(section)
+
+ if sorted(names_of_mixed_types) != sorted(cfg_names_of_mixed_types):
+ raise ValueError(f"Problem with features of mixed types: "
+ f"{set(names_of_mixed_types).symmetric_difference(cfg_names_of_mixed_types)}")