From 1dc1aff0026aff353a2214b89bd901d09c07acb5 Mon Sep 17 00:00:00 2001 From: tm689 <tm689@cam.ac.uk> Date: Thu, 29 Feb 2024 17:18:57 +0000 Subject: [PATCH] feat: converting double nested Race and Genotype information in to string. (Adding missing columns) --- coordinator/ProcessorSurveysWRT.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/coordinator/ProcessorSurveysWRT.py b/coordinator/ProcessorSurveysWRT.py index 1a1751c..d2a50c2 100644 --- a/coordinator/ProcessorSurveysWRT.py +++ b/coordinator/ProcessorSurveysWRT.py @@ -101,6 +101,7 @@ def nested_to_flattened(df): RAW_COLUMNS = [ "ObservationID", "OriginalID", + "PublishedLevel", "Origin", "Country", "Latitude", @@ -108,6 +109,7 @@ def nested_to_flattened(df): "FieldArea", "Cultivar", "CollectionDate", + "UpdateDate", "GrowthStage", "listDisease"] for i in RAW_COLUMNS: @@ -177,14 +179,16 @@ def nested_to_flattened(df): row[nested_row[rr]['DiseaseName'] + '.Incident'] = nested_row[rr]['IncidenceCategory'] row[nested_row[rr]['DiseaseName'] + '.Severity'] = nested_row[rr]['SeverityCategory'] - race_list = [] - geno_list = [] + nested_row[rr]['listResult'] = [{'Race': 'Alma', 'Genotype': 'Korte'},{'Race': 'Banan', 'Genotype': 'Malna'}] # !!!!!!!!! DELETE THIS LINE !!!!!!!!!! for i in range(len(nested_row[rr]['listResult'])): - race_list.append(nested_row[rr]['listResult'][i]['Race']) - geno_list.append(nested_row[rr]['listResult'][i]['Genotype']) + # TODO: check if the separation symbol is in the string or not + row[nested_row[rr]['DiseaseName'] + '.Race'] += nested_row[rr]['listResult'][i]['Race'] + row[nested_row[rr]['DiseaseName'] + '.Genotype'] += nested_row[rr]['listResult'][i]['Genotype'] + + if i != len(nested_row[rr]['listResult'])-1: + row[nested_row[rr]['DiseaseName'] + '.Race'] += '+' + row[nested_row[rr]['DiseaseName'] + '.Genotype'] += '+' - row[nested_row[rr]['DiseaseName'] + '.Race'] = race_list - row[nested_row[rr]['DiseaseName'] + '.Genotype'] = geno_list df.loc[index] = row return df @@ -206,13 +210,15 @@ def get_WRT_form_as_csv(form_credentials: dict, jobPath: str, config: dict, stat column_parser_dict = { "ObservationID" : 'KEY', "OriginalID" : 'None', - "Origin" : "Origin", + "PublishedLevel" : 'PublishedLevel', + "Origin" : 'Origin', "Country" : ('parse_cases',(('name_out','surveyor_infromation-country'),('cases', COUNTRY_ABBREVIATIONS_DICT))), "Latitude" : 'survey_infromation-location-Latitude', "Longitude" : 'survey_infromation-location-Longitude', "FieldArea" : 'site_information-field_area', "Cultivar" : 'site_information-variety', "CollectionDate" : ('parse_date',(('name_out','survey_infromation-survey_date'),('fmt_in','%m/%d/%Y'))), + "UpdateDate" : 'None', "GrowthStage" : 'site_information-growth_stage', "listDisease" : 'None', "YR.Severity" : ('parse_cases',(('name_out','yellow_rust-yellowrust_severity'),('cases', cases_severity),('dtype', int))), -- GitLab