From 1dc1aff0026aff353a2214b89bd901d09c07acb5 Mon Sep 17 00:00:00 2001
From: tm689 <tm689@cam.ac.uk>
Date: Thu, 29 Feb 2024 17:18:57 +0000
Subject: [PATCH] feat: converting double nested Race and Genotype information
 in to string. (Adding missing columns)

---
 coordinator/ProcessorSurveysWRT.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/coordinator/ProcessorSurveysWRT.py b/coordinator/ProcessorSurveysWRT.py
index 1a1751c..d2a50c2 100644
--- a/coordinator/ProcessorSurveysWRT.py
+++ b/coordinator/ProcessorSurveysWRT.py
@@ -101,6 +101,7 @@ def nested_to_flattened(df):
         RAW_COLUMNS = [
                 "ObservationID",
                 "OriginalID",
+                "PublishedLevel",
                 "Origin",
                 "Country",
                 "Latitude",
@@ -108,6 +109,7 @@ def nested_to_flattened(df):
                 "FieldArea",
                 "Cultivar",
                 "CollectionDate",
+                "UpdateDate",
                 "GrowthStage",
                 "listDisease"]
         for i in RAW_COLUMNS:
@@ -177,14 +179,16 @@ def nested_to_flattened(df):
             row[nested_row[rr]['DiseaseName'] + '.Incident'] = nested_row[rr]['IncidenceCategory']
             row[nested_row[rr]['DiseaseName'] + '.Severity'] = nested_row[rr]['SeverityCategory']
 
-            race_list = []
-            geno_list = []
+            nested_row[rr]['listResult'] = [{'Race': 'Alma', 'Genotype': 'Korte'},{'Race': 'Banan', 'Genotype': 'Malna'}] # !!!!!!!!! DELETE THIS LINE !!!!!!!!!!
             for i in range(len(nested_row[rr]['listResult'])):
-                race_list.append(nested_row[rr]['listResult'][i]['Race'])
-                geno_list.append(nested_row[rr]['listResult'][i]['Genotype'])
+                # TODO: check if the separation symbol is in the string or not
+                row[nested_row[rr]['DiseaseName'] + '.Race'] += nested_row[rr]['listResult'][i]['Race']
+                row[nested_row[rr]['DiseaseName'] + '.Genotype'] += nested_row[rr]['listResult'][i]['Genotype']
+
+                if i != len(nested_row[rr]['listResult'])-1:
+                    row[nested_row[rr]['DiseaseName'] + '.Race'] += '+'
+                    row[nested_row[rr]['DiseaseName'] + '.Genotype'] += '+'
             
-            row[nested_row[rr]['DiseaseName'] + '.Race'] = race_list
-            row[nested_row[rr]['DiseaseName'] + '.Genotype'] = geno_list
             df.loc[index] = row
 
     return df
@@ -206,13 +210,15 @@ def get_WRT_form_as_csv(form_credentials: dict, jobPath: str, config: dict, stat
     column_parser_dict = {
         "ObservationID" : 'KEY',
         "OriginalID" : 'None',
-        "Origin" : "Origin",
+        "PublishedLevel" : 'PublishedLevel',
+        "Origin" : 'Origin',
         "Country" : ('parse_cases',(('name_out','surveyor_infromation-country'),('cases', COUNTRY_ABBREVIATIONS_DICT))),
         "Latitude" : 'survey_infromation-location-Latitude',
         "Longitude" : 'survey_infromation-location-Longitude',
         "FieldArea" : 'site_information-field_area',
         "Cultivar" : 'site_information-variety',
         "CollectionDate" : ('parse_date',(('name_out','survey_infromation-survey_date'),('fmt_in','%m/%d/%Y'))),
+        "UpdateDate" : 'None',
         "GrowthStage" : 'site_information-growth_stage',
         "listDisease" : 'None',
         "YR.Severity" : ('parse_cases',(('name_out','yellow_rust-yellowrust_severity'),('cases', cases_severity),('dtype', int))),
-- 
GitLab