diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py index 28661c95c94903d48af67b32cc456ce2336cea49..7c068568e0cd26e10551cf73c53c38cb0ce23f48 100644 --- a/coordinator/ProcessorSurveys.py +++ b/coordinator/ProcessorSurveys.py @@ -10,6 +10,7 @@ from pathlib import Path import os from numpy import all as np_all +from numpy import any as np_any from shutil import copyfile from pandas import read_csv, concat @@ -214,6 +215,28 @@ def process_in_job_survey(jobPath,status,config,component): date = datetime.datetime.now() + # creating grouped survey files + group_directory = f"{jobPath}/Groups" + Path(group_directory).mkdir(parents=True, exist_ok=True) + + origins_list = df_join["Origin"].unique() + groups = {i:[i] for i in origins_list} + + assert not np_any([k in origins_list for k in config['Survey']['Groups'].keys()]) + + groups.update(config['Survey']['Groups']) + + for group_name,group_content in groups.items(): + + logger.info(f"Creating survey group {group_name} which includes {group_content}") + + df_group = df_join.loc[df_join["Origin"].isin(group_content)] + + group_surveys_filename = f"group_{group_name}.csv" + group_surveys_filepath = f"{group_directory}/{group_surveys_filename}" + + df_group.to_csv(group_surveys_filepath, index=False, quoting=csv.QUOTE_MINIMAL) + # prepare environment for clustering calc call_R = False