chore: creating grouped survey files

This is an initial step to have multiple source file setup. This step is NOT utilising the grouped files yet.

chore: creating grouped survey files
1de43e30 · Dr T. Mona · baca46e6 · 1de43e30
Commit 1de43e30 authored 1 year ago by Dr T. Mona
--- a/coordinator/ProcessorSurveys.py
+++ b/coordinator/ProcessorSurveys.py
@@ -10,6 +10,7 @@ from pathlib import Path
 import os

 from numpy import all as np_all
+from numpy import any as np_any
 from shutil import copyfile
 from pandas import read_csv, concat

@@ -214,6 +215,28 @@ def process_in_job_survey(jobPath,status,config,component):

    date = datetime.datetime.now()

+    # creating grouped survey files
+    group_directory = f"{jobPath}/Groups"
+    Path(group_directory).mkdir(parents=True, exist_ok=True)
+
+    origins_list = df_join["Origin"].unique()
+    groups = {i:[i] for i in origins_list}
+
+    assert not np_any([k in origins_list for k in config['Survey']['Groups'].keys()])
+
+    groups.update(config['Survey']['Groups'])
+    
+    for group_name,group_content in groups.items():
+
+        logger.info(f"Creating survey group {group_name} which includes {group_content}")
+        
+        df_group = df_join.loc[df_join["Origin"].isin(group_content)]
+
+        group_surveys_filename = f"group_{group_name}.csv"
+        group_surveys_filepath = f"{group_directory}/{group_surveys_filename}"
+        
+        df_group.to_csv(group_surveys_filepath, index=False, quoting=csv.QUOTE_MINIMAL)
+
    # prepare environment for clustering calc
    call_R = False