X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/predictops.git/blobdiff_plain/4b6d71d96bb92791cc31640e5f30378ae6fe63e4..661ece8c54b20d4c559e5f73616cc213f8c4f6b7:/predictops/source/source.py diff --git a/predictops/source/source.py b/predictops/source/source.py index 8e68716..70f24da 100644 --- a/predictops/source/source.py +++ b/predictops/source/source.py @@ -1,3 +1,4 @@ +from configparser import ConfigParser from csv import DictReader from logging import getLogger from logging.config import fileConfig @@ -11,14 +12,40 @@ logger = getLogger() class Source: def __init__(self): ''' - Check if the same feature name is used in two different feature sources + Check if the same feature name is used in two different feature sources, + and if the sources of type 3 (being both categorical and numerical) have + a specified type in the feature_...cfg file ''' logger.info('Check for redondant feature names') - csv_files = Path.cwd() / 'config' / 'features' + feature_files = Path.cwd() / 'config' / 'features' list_of_names = [] - for csv_file in listdir(csv_files): - with open(csv_files / csv_file, "r") as f: - reader = DictReader(f, delimiter=',') - list_of_names.extend([row['name'] for row in reader]) + for file_name in listdir(feature_files ): + if file_name.endswith('csv'): + with open(feature_files / file_name, "r") as f: + reader = DictReader(f, delimiter=',') + list_of_names.extend([row['name'] for row in reader]) + if len(list_of_names) != len(set(list_of_names)): - raise ValueError("At least two features have the same name") \ No newline at end of file + raise ValueError("At least two features have the same name") + + logger.info('Check for specified feature types') + names_of_mixed_types = [] + for file_name in listdir(feature_files): + if file_name.endswith('csv'): + with open(feature_files / file_name, "r") as f: + reader = DictReader(f, delimiter=',') + names_of_mixed_types.extend([row['name'] for row in reader + if row['type'] == '3']) + + cfg_names_of_mixed_types = [] + for file_name in listdir(feature_files): + if file_name.endswith('cfg'): + config = ConfigParser() + config.read(feature_files / file_name) + for section in config: + if config.has_option(section, 'numerical'): + cfg_names_of_mixed_types.append(section) + + if sorted(names_of_mixed_types) != sorted(cfg_names_of_mixed_types): + raise ValueError(f"Problem with features of mixed types: " + f"{set(names_of_mixed_types).symmetric_difference(cfg_names_of_mixed_types)}")