From cf533ecc6df90c89d5ec6c127f53c5cd6a12f2dc Mon Sep 17 00:00:00 2001
From: jws52 <jws52@cam.ac.uk>
Date: Thu, 19 Oct 2023 10:50:43 +0100
Subject: [PATCH] feat: Alternative new ODK form

---
 coordinator/ProcessorSurveys.py        |   4 +-
 coordinator/ProcessorSurveysnewODK.py  |  10 +
 coordinator/ProcessorSurveysnewODK2.py | 243 +++++++++++++++++++++++++
 3 files changed, 256 insertions(+), 1 deletion(-)
 create mode 100644 coordinator/ProcessorSurveysnewODK2.py

diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py
index 33bbd9e..0a70a84 100644
--- a/coordinator/ProcessorSurveys.py
+++ b/coordinator/ProcessorSurveys.py
@@ -35,6 +35,7 @@ from source_gen.clustering import run_case
 from ProcessorSurveysODK import get_ODK_form_as_csv
 from ProcessorSurveysODKv2 import get_ODKv2_form_as_csv
 from ProcessorSurveysnewODK import get_newODK_form_as_csv
+from ProcessorSurveysnewODK2 import get_newODK2_form_as_csv
 from ProcessorSurveyskobotoolbox import get_kobotoolbox_form_as_csv
 from ProcessorSurveysWRSIS import get_WRSIS_form_as_csv
 from ProcessorSurveysWRT import get_WRT_form_as_csv
@@ -71,6 +72,7 @@ class ProcessorSurveys(Processor):
             'WRT':         get_WRT_form_as_csv,
             'ODKv2':       get_ODKv2_form_as_csv,
             'newODK':      get_newODK_form_as_csv,
+            'newODK2' : get_newODK2_form_as_csv,
         }
 
     def process_pre_job_survey(self, input_args):
@@ -463,4 +465,4 @@ class ProcessorSurveys(Processor):
 
 if __name__ == '__main__':
     processor = ProcessorSurveys()
-    processor.run_processor("Survey")
\ No newline at end of file
+    processor.run_processor("Survey")
diff --git a/coordinator/ProcessorSurveysnewODK.py b/coordinator/ProcessorSurveysnewODK.py
index 90d5637..caa817c 100644
--- a/coordinator/ProcessorSurveysnewODK.py
+++ b/coordinator/ProcessorSurveysnewODK.py
@@ -39,13 +39,23 @@ cases_severity = {
     '10':10,
     '15':15,
     '20':20,
+    '25':25,
     '30':30,
+    '35':35,
     '40':40,
+    '45':45,
     '50':50,
+    '55':55,
     '60':60,
+    '65':65,
     '70':70,
+    '75':75,
     '80':80,   
     '80+':80,
+    '85':85,
+    '90':90,   
+    '95':95,
+    '100':100
     }
 
 def get_from_kobotoolbox(url,form_id,form_token,**kwargs):
diff --git a/coordinator/ProcessorSurveysnewODK2.py b/coordinator/ProcessorSurveysnewODK2.py
new file mode 100644
index 0000000..720a129
--- /dev/null
+++ b/coordinator/ProcessorSurveysnewODK2.py
@@ -0,0 +1,243 @@
+#ProcessorSurveyskobotoolbox.py
+"""Functions for parsing wheat rust survey records from the new ODK form on the kobotoolbox server."""
+
+import csv
+import datetime
+import logging
+import os
+from pathlib import Path
+import requests
+
+from shutil import copyfile
+from pandas import DataFrame
+
+from ProcessorSurveyUtils import parse_columns
+from ProcessorSurveysnewODK import (
+        cases_incident,
+        cases_severity,
+        get_from_kobotoolbox,
+        build_dataframe
+)
+from ProcessorUtils import (
+        endJob,
+        add_filters_to_sublogger,
+)
+
+logger = logging.getLogger('Processor.Surveys.kobotoolbox')
+add_filters_to_sublogger(logger)
+
+
+def get_newODK2_form_as_csv(form_credentials: dict, jobPath: str, config: dict, status):
+    '''Given a dict with a single kobotoolbox form to download from a kobotoolbox
+    server, obtains it and converts to csv.'''
+
+    output_dir = 'Export_newCSV2'
+    output_path = f"{jobPath}/{output_dir}/"
+
+    Path(output_path).mkdir(parents=True, exist_ok=True)
+
+    # get data from kobotoolbox server
+
+    # keys are column names in the input dataframe
+    # values that are None mean they should be dropped
+    # values that are string simply rename the column
+    # values that are functions should be run with that key and returns series/dataframe
+
+    column_parser_dict =  {
+            '_id' : 'None',
+            'formhub/uuid' : 'None',
+            'start' : ('parse_date',(('name_out','start'),('fmt_in','%Y-%m-%dT%H:%M:%S.%f%z'))),
+            'end' : ('parse_date',(('name_out','end'),('fmt_in','%Y-%m-%dT%H:%M:%S.%f%z'))),
+            'today' : ('parse_date',(('name_out','today'),('fmt_in','%Y-%m-%d'))),
+            'deviceid' : 'deviceid',
+            'imei' : 'imei',
+            'phonenumber' : 'None',
+            'username' : 'username',
+            'surveyor_information/region' : 'None',
+            'surveyor_information/country' : 'surveyor_infromation-country',
+            'surveyor_information/surveyor' : 'surveyor_infromation-surveyor_name',
+            'surveyor_information/institution' : 'surveyor_infromation-institution',
+            'survey_information/admin_level_1' : 'None',
+            'survey_information/admin_level_2' : 'None',
+            'survey_information/admin_level_3' : 'None',
+            'survey_information/admin_level_4' : 'None',
+            'survey_information/location' : 'None',
+            'survey_information/location_aggregate' : 'None',
+            'survey_information/location_name' : 'survey_infromation-location_name',
+            'survey_information/location_gps' : ('parse_location_kobotoolbox',()),
+            'survey_information/survey_date' : ('parse_date',(('name_out','survey_infromation-survey_date'),('fmt_in','%Y-%m-%d'))),
+            'survey_information/survey_season' : 'None',
+            'site_information/survey_site' : 'site_information-survey_site',
+            'site_information/crop' : 'site_information-crop',
+            'site_information/growth_stage' : 'site_information-growth_stage',
+            'site_information/area_unit' : 'None',
+            'site_information/field_area' : 'None',
+            'site_information/field_area_in_ha' : 'site_information-field_area',
+            'site_information/variety' : 'site_information-variety',
+            'site_information/survey_site_other' : 'None',
+            'major_observed_diseases' : 'None',
+            'SR/SR_incidence' : ('parse_cases',(('name_out','stem_rust-stemrust_incidence'),('cases', cases_incident),('fillna','none'))),
+            'SR/SR_severity' : ('parse_cases',(('name_out','stem_rust-Stemrust_severity'),('cases', cases_severity),('dtype', int),('fillna','0'))),
+            'SR/SR_IT' : 'stem_rust-stemrust_host_plant_reaction',
+            'SR/SR_image' : 'None',
+            'LR/LR_incidence' : ('parse_cases',(('name_out','leaf_rust-leafrust_incidence'),('cases', cases_incident),('fillna','none'))),
+            'LR/LR_severity' : ('parse_cases',(('name_out','leaf_rust-leafrust_severity'),('cases', cases_severity),('dtype', int),('fillna','0'))),
+            'LR/LR_IT' : 'leaf_rust-leafrust_host_plant_reaction',
+            'LR/LR_image' : 'None',
+            'YR/YR_incidence' : ('parse_cases',(('name_out','yellow_rust-yellowrust_incidence'),('cases', cases_incident),('fillna','none'))),
+            'YR/YR_severity' : ('parse_cases',(('name_out','yellow_rust-yellowrust_severity'),('cases', cases_severity),('dtype', int),('fillna','0'))),
+            'YR/YR_IT' : 'yellow_rust-yellowrust_host_plant_reaction',
+            'YR/YR_image' : 'None',
+            'YR_head/YR_head_infection' : '',
+            'YR_head/YR_head_incidence' : '',
+            'YR_head/YR_head_severity' : '',
+            'YR_head/YR_head_image' : 'None',
+            'septoria/septoria_incidence' : 'septoria-septoria_incidence',
+            'septoria/septoria_severity' : 'septoria-septoria_severity',
+            'septoria/septoria_image' : 'None',
+            'blast/blast_incidence' : 'None',
+            'blast/blast_severity' : 'None',
+            'blast/blast_image' : 'None',
+            'SB/SB_incidence' : 'None',
+            'SB/SB_severity' : 'None',
+            'SB/SB_image' : 'None',
+            'FHB/FHB_incidence' : 'None',
+            'FHB/FHB_severity' : 'None',
+            'FHB/FHB_image' : 'None',
+            'other_observed_diseases_pests' : 'None',
+            'other_disease_reapeat' : 'None',
+            'score_diseases_count' : 'None',
+            'score_diseases' : 'None',
+            'observed_other_pests' : 'None',
+            'observed_other_pests_record_count' : 'None',
+            'observed_other_pests_record' : 'None',
+            'insects/insect_damage' : 'None',
+            'fungicide_applied' : 'None',
+            'Fungicide_information/fungicide_names' : 'None',
+            'Fungicide_information/Application_frequency' : 'None',
+            'Fungicide_application_information/application_dates_count' : 'None',
+            'Fungicide_application_information/application_dates' : 'None',
+            'samples_collected_y_n' : 'samples_collected',
+            'samples_type' : 'samples_type',
+            'samples_count' : 'None',
+            'samples' : 'None',
+            'comment' : 'comment',
+            '__version__' : 'None',
+            'meta/instanceID' : 'meta-instanceID',
+            '_xform_id_string' : 'None',
+            '_uuid' : 'KEY',
+            '_attachments' : 'None',
+            '_status' : 'None',
+            '_geolocation' : 'None', # looks like a duplication of survey_infromation/location
+            '_submission_time' : ('parse_date',(('name_out','SubmissionDate'),('fmt_in','%Y-%m-%dT%H:%M:%S'))),
+            '_tags' : 'None',
+            '_notes' : 'None',
+            '_validation_status' : 'None',
+            '_submitted_by' : 'None',
+            }
+
+    unavailable_at_top_level = {
+            'dead_stemrust_samples' : 'SET-OF-dead_stemrust_samples',
+            'dead_stemrust_samples_count' : 'dead_stemrust_samples_count',
+            'dead_yellowrust_samples' : 'SET-OF-dead_yellowrust_samples',
+            'dead_yellowrust_samples_count' : 'dead_yellowrust_samples_count',
+            'live_leafrust_samples' : 'SET-OF-live_leafrust_samples',
+            'live_leafrust_samples_count' : 'live_leafrust_samples_count',
+            'live_stemrust_samples' : 'SET-OF-live_stemrust_samples',
+            'live_stemrust_samples_count' : 'live_stemrust_samples_count',
+            'live_yellowrust_samples' : 'SET-OF-live_yellowrust_samples',
+            'live_yellowrust_samples_count' : 'live_yellowrust_samples_count',
+            }
+
+    logger.debug('Performing download')
+
+    # perform a pull from the server, and if it fails write a warning message
+
+    download_success = True
+
+    skip_download: bool = config['Survey'].get('SkipServerDownload', False)
+
+    if not skip_download:
+        try:
+
+            request = get_from_kobotoolbox(**form_credentials)
+
+        except requests.exceptions.RequestException as e:
+            status.reset('WARNING')
+
+            download_success = False
+
+    # define filenames
+    csv_filename = f"SurveyData_{form_credentials['form_id']}.csv"
+
+    csv_processed_filename = f"SurveyDataProcessed.csv"
+    csv_processed_path = f"{output_path}/{csv_processed_filename}"
+
+    if download_success and not skip_download:
+        # parse dataframe
+
+        dataframe_raw = build_dataframe(request)
+
+        logger.debug('Saving raw csv file')
+
+        df_raw_filename = f"{output_path}/{csv_filename}.csv"
+
+        dataframe_raw.to_csv(df_raw_filename,index=False,quoting=csv.QUOTE_MINIMAL)
+
+        # process to match ODK format
+
+        dataframe_processed = parse_columns(dataframe_raw,column_parser_dict)
+
+        logger.debug('Saving processed csv file')
+
+        dataframe_processed.to_csv(csv_processed_path,index=False,quoting=csv.QUOTE_MINIMAL)
+
+    if not download_success or skip_download:
+
+        logger.info("Because server download failed somewhere (or we are skipping downloads), trying to recover by copying recent download")
+
+        copy_success = False
+
+        days_back = 1
+        acceptable_days_back = int(config['Survey']['AcceptableDowntimeDays'])
+        logger.debug(f"Acceptable server downtime is set to {acceptable_days_back} days")
+
+        while ((not copy_success) and (days_back <= acceptable_days_back)):
+
+            current_date = datetime.datetime.strptime(config['StartString'],'%Y%m%d')
+
+            past_date = current_date - datetime.timedelta(days=days_back)
+
+            #past_jobPath = f"{config['WorkspacePathout']}{short_name[component]}_{past_date.strftime('%Y%m%d')}"
+            past_jobPath = f"{config['WorkspacePath']}/SURVEYDATA_{past_date.strftime('%Y%m%d')}"
+
+            past_output_path = f"{past_jobPath}/{output_dir}/"
+
+            try:
+                # check that python or perl coordinator script succeeded for that date
+                success_py = os.path.isfile(f"{past_jobPath}/STATUS_SUCCESS")
+                success_perl = os.path.isfile(f"{past_jobPath}/SURVEYDATA_SUCCESS.txt")
+                assert success_py or success_perl
+
+                past_csv_filename = csv_processed_filename
+
+                logger.info(f"Looking for {past_output_path+past_csv_filename}")
+
+                copyfile(past_output_path+past_csv_filename,csv_processed_path)
+
+                assert os.path.isfile(csv_processed_path)
+
+                copy_success = True
+            except:
+                logger.info(f"Not found a kobotoolbox download in {past_output_path}")
+
+            days_back += 1
+
+        if not copy_success:
+            logger.error(f"Failed get a suitable copy of survey data.")
+            status.reset('ERROR')
+            endJob(status,premature=True)
+
+        logger.warning(f"Using download from {past_jobPath}.")
+
+    return csv_processed_path
-- 
GitLab