#ProcessorSurveyskobotoolbox.py """Functions for parsing wheat rust survey records from the new ODK form on the kobotoolbox server.""" import csv import datetime import logging import os from pathlib import Path import requests from shutil import copyfile from pandas import DataFrame from ProcessorSurveyUtils import parse_columns from ProcessorSurveysnewODK import ( cases_incident, cases_severity, get_from_kobotoolbox, build_dataframe ) from ProcessorUtils import ( endJob, add_filters_to_sublogger, ) logger = logging.getLogger('Processor.Surveys.kobotoolbox') add_filters_to_sublogger(logger) def get_newODK2_form_as_csv(form_credentials: dict, jobPath: str, config: dict, status): '''Given a dict with a single kobotoolbox form to download from a kobotoolbox server, obtains it and converts to csv.''' output_dir = 'Export_newCSV2' output_path = f"{jobPath}/{output_dir}/" Path(output_path).mkdir(parents=True, exist_ok=True) # get data from kobotoolbox server # keys are column names in the input dataframe # values that are None mean they should be dropped # values that are string simply rename the column # values that are functions should be run with that key and returns series/dataframe column_parser_dict = { '_id' : 'None', 'formhub/uuid' : 'None', 'start' : ('parse_date',(('name_out','start'),('fmt_in','%Y-%m-%dT%H:%M:%S.%f%z'))), 'end' : ('parse_date',(('name_out','end'),('fmt_in','%Y-%m-%dT%H:%M:%S.%f%z'))), 'today' : ('parse_date',(('name_out','today'),('fmt_in','%Y-%m-%d'))), 'deviceid' : 'deviceid', 'imei' : 'imei', #'phonenumber' : 'None', 'username' : 'username', 'surveyor_information/region' : 'None', 'surveyor_information/country' : 'surveyor_infromation-country', 'surveyor_information/surveyor' : 'surveyor_infromation-surveyor_name', 'surveyor_information/institution' : 'surveyor_infromation-institution', 'survey_information/admin_level_1' : 'None', 'survey_information/admin_level_2' : 'None', 'survey_information/admin_level_3' : 'None', 'survey_information/admin_level_4' : 'None', 'survey_information/location' : 'None', 'survey_information/location_aggregate' : 'None', 'survey_information/location_name' : 'survey_infromation-location_name', 'survey_information/location_gps' : ('parse_location_kobotoolbox',()), 'survey_information/survey_date' : ('parse_date',(('name_out','survey_infromation-survey_date'),('fmt_in','%Y-%m-%d'))), 'survey_information/survey_season' : 'None', 'site_information/survey_site' : 'site_information-survey_site', 'site_information/crop' : 'site_information-crop', 'site_information/growth_stage' : 'site_information-growth_stage', 'site_information/area_unit' : 'None', 'site_information/field_area' : 'None', 'site_information/field_area_in_ha' : 'site_information-field_area', #'site_information/variety_eth' : 'site_information-variety', 'site_information/survey_site_other' : 'None', 'major_observed_diseases' : 'None', 'SR/SR_incidence' : ('parse_cases',(('name_out','stem_rust-stemrust_incidence'),('cases', cases_incident),('fillna','none'))), 'SR/SR_severity' : ('parse_cases',(('name_out','stem_rust-Stemrust_severity'),('cases', cases_severity),('dtype', int),('fillna','0'))), 'SR/SR_IT' : 'stem_rust-stemrust_host_plant_reaction', 'SR/SR_image' : 'None', 'LR/LR_incidence' : ('parse_cases',(('name_out','leaf_rust-leafrust_incidence'),('cases', cases_incident),('fillna','none'))), 'LR/LR_severity' : ('parse_cases',(('name_out','leaf_rust-leafrust_severity'),('cases', cases_severity),('dtype', int),('fillna','0'))), 'LR/LR_IT' : 'leaf_rust-leafrust_host_plant_reaction', 'LR/LR_image' : 'None', 'YR/YR_incidence' : ('parse_cases',(('name_out','yellow_rust-yellowrust_incidence'),('cases', cases_incident),('fillna','none'))), 'YR/YR_severity' : ('parse_cases',(('name_out','yellow_rust-yellowrust_severity'),('cases', cases_severity),('dtype', int),('fillna','0'))), 'YR/YR_IT' : 'yellow_rust-yellowrust_host_plant_reaction', 'YR/YR_image' : 'None', 'YR_head/YR_head_infection' : '', 'YR_head/YR_head_incidence' : '', 'YR_head/YR_head_severity' : '', 'YR_head/YR_head_image' : 'None', 'septoria/septoria_incidence' : 'septoria-septoria_incidence', 'septoria/septoria_severity' : 'septoria-septoria_severity', 'septoria/septoria_image' : 'None', #'blast/blast_incidence' : 'None', #'blast/blast_severity' : 'None', #'blast/blast_image' : 'None', #'SB/SB_incidence' : 'None', #'SB/SB_severity' : 'None', #'SB/SB_image' : 'None', 'FHB/FHB_incidence' : 'None', 'FHB/FHB_severity' : 'None', 'FHB/FHB_image' : 'None', 'other_observed_diseases_pests' : 'None', 'other_disease_reapeat' : 'None', 'score_diseases_count' : 'None', 'score_diseases' : 'None', 'observed_other_pests' : 'None', 'observed_other_pests_record_count' : 'None', 'observed_other_pests_record' : 'None', 'insects/insect_damage' : 'None', 'fungicide_applied' : 'None', 'Fungicide_information/fungicide_names' : 'None', 'Fungicide_information/Application_frequency' : 'None', 'Fungicide_application_information/application_dates_count' : 'None', 'Fungicide_application_information/application_dates' : 'None', 'samples_collected_y_n' : 'samples_collected', 'samples_type' : 'samples_type', 'samples_count' : 'None', 'samples' : 'None', 'comment' : 'comment', '__version__' : 'None', 'meta/instanceID' : 'meta-instanceID', '_xform_id_string' : 'None', '_uuid' : 'KEY', '_attachments' : 'None', '_status' : 'None', '_geolocation' : 'None', # looks like a duplication of survey_infromation/location '_submission_time' : ('parse_date',(('name_out','SubmissionDate'),('fmt_in','%Y-%m-%dT%H:%M:%S'))), '_tags' : 'None', '_notes' : 'None', '_validation_status' : 'None', '_submitted_by' : 'None', } unavailable_at_top_level = { 'dead_stemrust_samples' : 'SET-OF-dead_stemrust_samples', 'dead_stemrust_samples_count' : 'dead_stemrust_samples_count', 'dead_yellowrust_samples' : 'SET-OF-dead_yellowrust_samples', 'dead_yellowrust_samples_count' : 'dead_yellowrust_samples_count', 'live_leafrust_samples' : 'SET-OF-live_leafrust_samples', 'live_leafrust_samples_count' : 'live_leafrust_samples_count', 'live_stemrust_samples' : 'SET-OF-live_stemrust_samples', 'live_stemrust_samples_count' : 'live_stemrust_samples_count', 'live_yellowrust_samples' : 'SET-OF-live_yellowrust_samples', 'live_yellowrust_samples_count' : 'live_yellowrust_samples_count', } logger.debug('Performing download') # perform a pull from the server, and if it fails write a warning message download_success = True skip_download: bool = config['Survey'].get('SkipServerDownload', False) if not skip_download: try: request = get_from_kobotoolbox(**form_credentials) except requests.exceptions.RequestException as e: status.reset('WARNING') download_success = False # define filenames csv_filename = f"SurveyData_{form_credentials['form_id']}.csv" csv_processed_filename = f"SurveyDataProcessed.csv" csv_processed_path = f"{output_path}/{csv_processed_filename}" if download_success and not skip_download: # parse dataframe dataframe_raw = build_dataframe(request) logger.debug('Saving raw csv file') df_raw_filename = f"{output_path}/{csv_filename}.csv" dataframe_raw.to_csv(df_raw_filename,index=False,quoting=csv.QUOTE_MINIMAL) # process to match ODK format dataframe_processed = parse_columns(dataframe_raw,column_parser_dict) logger.debug('Saving processed csv file') dataframe_processed.to_csv(csv_processed_path,index=False,quoting=csv.QUOTE_MINIMAL) if not download_success or skip_download: logger.info("Because server download failed somewhere (or we are skipping downloads), trying to recover by copying recent download") copy_success = False days_back = 1 acceptable_days_back = int(config['Survey']['AcceptableDowntimeDays']) logger.debug(f"Acceptable server downtime is set to {acceptable_days_back} days") while ((not copy_success) and (days_back <= acceptable_days_back)): current_date = datetime.datetime.strptime(config['StartString'],'%Y%m%d') past_date = current_date - datetime.timedelta(days=days_back) #past_jobPath = f"{config['WorkspacePathout']}{short_name[component]}_{past_date.strftime('%Y%m%d')}" past_jobPath = f"{config['WorkspacePath']}/SURVEYDATA_{past_date.strftime('%Y%m%d')}" past_output_path = f"{past_jobPath}/{output_dir}/" try: # check that python or perl coordinator script succeeded for that date success_py = os.path.isfile(f"{past_jobPath}/STATUS_SUCCESS") success_perl = os.path.isfile(f"{past_jobPath}/SURVEYDATA_SUCCESS.txt") assert success_py or success_perl past_csv_filename = csv_processed_filename logger.info(f"Looking for {past_output_path+past_csv_filename}") copyfile(past_output_path+past_csv_filename,csv_processed_path) assert os.path.isfile(csv_processed_path) copy_success = True except: logger.info(f"Not found a kobotoolbox download in {past_output_path}") days_back += 1 if not copy_success: logger.error(f"Failed get a suitable copy of survey data.") status.reset('ERROR') endJob(status,premature=True) logger.warning(f"Using download from {past_jobPath}.") return csv_processed_path