1 from configparser import ConfigParser
2 from csv import DictReader
3 from logging import getLogger
4 from logging.config import fileConfig
6 from pathlib import Path
8 fileConfig((Path.cwd() / 'config') / 'logging.cfg')
15 Check if the same feature name is used in two different feature sources,
16 and if the sources of type 3 (being both categorical and numerical) have
17 a specified type in the feature_...cfg file
19 logger.info('Check for redondant feature names')
20 feature_files = Path.cwd() / 'config' / 'features'
22 for file_name in listdir(feature_files ):
23 if file_name.endswith('csv'):
24 with open(feature_files / file_name, "r") as f:
25 reader = DictReader(f, delimiter=',')
26 list_of_names.extend([row['name'] for row in reader])
28 if len(list_of_names) != len(set(list_of_names)):
29 raise ValueError("At least two features have the same name")
31 logger.info('Check for specified feature types')
32 names_of_mixed_types = []
33 for file_name in listdir(feature_files):
34 if file_name.endswith('csv'):
35 with open(feature_files / file_name, "r") as f:
36 reader = DictReader(f, delimiter=',')
37 names_of_mixed_types.extend([row['name'] for row in reader
38 if row['type'] == '3'])
40 cfg_names_of_mixed_types = []
41 for file_name in listdir(feature_files):
42 if file_name.endswith('cfg'):
43 config = ConfigParser()
44 config.read(feature_files / file_name)
45 for section in config:
46 if config.has_option(section, 'numerical'):
47 cfg_names_of_mixed_types.append(section)
49 if sorted(names_of_mixed_types) != sorted(cfg_names_of_mixed_types):
50 raise ValueError(f"Problem with features of mixed types: "
51 f"{set(names_of_mixed_types).symmetric_difference(cfg_names_of_mixed_types)}")