From 1de43e30b03129e807a7b15a7f375bea0722115c Mon Sep 17 00:00:00 2001 From: tm689 <tm689@cam.ac.uk> Date: Thu, 11 May 2023 12:49:26 +0100 Subject: [PATCH] chore: creating grouped survey files This is an initial step to have multiple source file setup. This step is NOT utilising the grouped files yet. --- coordinator/ProcessorSurveys.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py index 28661c9..7c06856 100644 --- a/coordinator/ProcessorSurveys.py +++ b/coordinator/ProcessorSurveys.py @@ -10,6 +10,7 @@ from pathlib import Path import os from numpy import all as np_all +from numpy import any as np_any from shutil import copyfile from pandas import read_csv, concat @@ -214,6 +215,28 @@ def process_in_job_survey(jobPath,status,config,component): date = datetime.datetime.now() + # creating grouped survey files + group_directory = f"{jobPath}/Groups" + Path(group_directory).mkdir(parents=True, exist_ok=True) + + origins_list = df_join["Origin"].unique() + groups = {i:[i] for i in origins_list} + + assert not np_any([k in origins_list for k in config['Survey']['Groups'].keys()]) + + groups.update(config['Survey']['Groups']) + + for group_name,group_content in groups.items(): + + logger.info(f"Creating survey group {group_name} which includes {group_content}") + + df_group = df_join.loc[df_join["Origin"].isin(group_content)] + + group_surveys_filename = f"group_{group_name}.csv" + group_surveys_filepath = f"{group_directory}/{group_surveys_filename}" + + df_group.to_csv(group_surveys_filepath, index=False, quoting=csv.QUOTE_MINIMAL) + # prepare environment for clustering calc call_R = False -- GitLab