From ad69c7e286e67c755b9cf4746e67ecd6790ddc9a Mon Sep 17 00:00:00 2001
From: tm689 <tm689@cam.ac.uk>
Date: Wed, 28 Feb 2024 12:14:14 +0000
Subject: [PATCH] feat: separating out the race and genotype information as
 lists in dedicated columns

---
 coordinator/ProcessorSurveysWRT.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/coordinator/ProcessorSurveysWRT.py b/coordinator/ProcessorSurveysWRT.py
index 54446e4..1a1751c 100644
--- a/coordinator/ProcessorSurveysWRT.py
+++ b/coordinator/ProcessorSurveysWRT.py
@@ -160,10 +160,10 @@ def nested_to_flattened(df):
         df[i] = ""
 
     # add dedicated rust columns, with default values
-    NEW_RUST_COLUMNS = {"SR.Incident":"N","SR.Severity":"N","SR.Reaction":"N",
-                   "LR.Incident":"N","LR.Severity":"N","LR.Reaction":"N",
-                   "YR.Incident":"N","YR.Severity":"N","YR.Reaction":"N",
-                   "Septoria.Incident":"N","Septoria.Severity":"N"}
+    NEW_RUST_COLUMNS = {'SR.Incident':'N','SR.Severity':'N','SR.Reaction':'N', 'SR.Race':'', 'SR.Genotype':'',
+                        'LR.Incident':'N','LR.Severity':'N','LR.Reaction':'N', 'LR.Race':'', 'LR.Genotype':'',
+                        'YR.Incident':'N','YR.Severity':'N','YR.Reaction':'N', 'YR.Race':'', 'YR.Genotype':'',
+                        'Septoria.Incident':'N','Septoria.Severity':'N'}
 
     for i in NEW_RUST_COLUMNS.keys():
         df[i] = NEW_RUST_COLUMNS[i]
@@ -171,11 +171,20 @@ def nested_to_flattened(df):
     logger.info('Separating nested information into dedicated columns')
 
     for index,row in df.iterrows():
-        nested_row = row["listDisease"]
+        nested_row = row['listDisease']
         for rr in range(len(nested_row)):
             # separating nested information into the dedicated columns
-            row[nested_row[rr]["DiseaseName"] + ".Incident"] = nested_row[rr]["IncidenceCategory"]
-            row[nested_row[rr]["DiseaseName"] + ".Severity"] = nested_row[rr]["SeverityCategory"]
+            row[nested_row[rr]['DiseaseName'] + '.Incident'] = nested_row[rr]['IncidenceCategory']
+            row[nested_row[rr]['DiseaseName'] + '.Severity'] = nested_row[rr]['SeverityCategory']
+
+            race_list = []
+            geno_list = []
+            for i in range(len(nested_row[rr]['listResult'])):
+                race_list.append(nested_row[rr]['listResult'][i]['Race'])
+                geno_list.append(nested_row[rr]['listResult'][i]['Genotype'])
+            
+            row[nested_row[rr]['DiseaseName'] + '.Race'] = race_list
+            row[nested_row[rr]['DiseaseName'] + '.Genotype'] = geno_list
             df.loc[index] = row
 
     return df
@@ -209,12 +218,18 @@ def get_WRT_form_as_csv(form_credentials: dict, jobPath: str, config: dict, stat
         "YR.Severity" : ('parse_cases',(('name_out','yellow_rust-yellowrust_severity'),('cases', cases_severity),('dtype', int))),
         "YR.Incident" : ('parse_cases',(('name_out','yellow_rust-yellowrust_incidence'),('cases', cases_incident))),
         "YR.Reaction" : 'yellow_rust-yellowrust_host_plant_reaction',
+        "YR.Race" : 'None',
+        "YR.Genotype" : 'None',
         "SR.Severity" : ('parse_cases',(('name_out','stem_rust-Stemrust_severity'),('cases', cases_severity),('dtype', int))),
         "SR.Incident" : ('parse_cases',(('name_out','stem_rust-stemrust_incidence'),('cases', cases_incident))),
         "SR.Reaction" : 'stem_rust-stemrust_host_plant_reaction',
+        "SR.Race" : 'None',
+        "SR.Genotype" : 'None',
         "LR.Severity" : ('parse_cases',(('name_out','leaf_rust-leafrust_severity'),('cases', cases_severity),('dtype', int))),
         "LR.Incident" : ('parse_cases',(('name_out','leaf_rust-leafrust_incidence'),('cases', cases_incident))),
         "LR.Reaction" : 'leaf_rust-leafrust_host_plant_reaction',
+        "LR.Race" : 'None',
+        "LR.Genotype" : 'None',
         "Septoria.Severity" : 'septoria-septoria_severity',
         "Septoria.Incident" : 'septoria-septoria_incidence'
     }
-- 
GitLab