From 1de43e30b03129e807a7b15a7f375bea0722115c Mon Sep 17 00:00:00 2001
From: tm689 <tm689@cam.ac.uk>
Date: Thu, 11 May 2023 12:49:26 +0100
Subject: [PATCH] chore: creating grouped survey files This is an initial step
 to have multiple source file setup. This step is NOT utilising the grouped
 files yet.

---
 coordinator/ProcessorSurveys.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py
index 28661c9..7c06856 100644
--- a/coordinator/ProcessorSurveys.py
+++ b/coordinator/ProcessorSurveys.py
@@ -10,6 +10,7 @@ from pathlib import Path
 import os
 
 from numpy import all as np_all
+from numpy import any as np_any
 from shutil import copyfile
 from pandas import read_csv, concat
 
@@ -214,6 +215,28 @@ def process_in_job_survey(jobPath,status,config,component):
 
     date = datetime.datetime.now()
 
+    # creating grouped survey files
+    group_directory = f"{jobPath}/Groups"
+    Path(group_directory).mkdir(parents=True, exist_ok=True)
+
+    origins_list = df_join["Origin"].unique()
+    groups = {i:[i] for i in origins_list}
+
+    assert not np_any([k in origins_list for k in config['Survey']['Groups'].keys()])
+
+    groups.update(config['Survey']['Groups'])
+    
+    for group_name,group_content in groups.items():
+
+        logger.info(f"Creating survey group {group_name} which includes {group_content}")
+        
+        df_group = df_join.loc[df_join["Origin"].isin(group_content)]
+
+        group_surveys_filename = f"group_{group_name}.csv"
+        group_surveys_filepath = f"{group_directory}/{group_surveys_filename}"
+        
+        df_group.to_csv(group_surveys_filepath, index=False, quoting=csv.QUOTE_MINIMAL)
+
     # prepare environment for clustering calc
     call_R = False
 
-- 
GitLab