From cebca69bc0719e6f77f8e0d72bc6f7adbe1a1c9a Mon Sep 17 00:00:00 2001 From: Jake Smith <jws52@cam.ac.uk> Date: Wed, 17 Mar 2021 15:04:55 +0000 Subject: [PATCH] feat: Partial ability to download multiple ODK forms --- ProcessorComponents.py | 69 ++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/ProcessorComponents.py b/ProcessorComponents.py index 62fbe23..8d30deb 100644 --- a/ProcessorComponents.py +++ b/ProcessorComponents.py @@ -129,29 +129,30 @@ def process_pre_job_epi(input_args): return True -def process_in_job_survey(jobPath,status,config,component): - logger.info('started process_in_job_survey()') - - logger.debug('Performing download from ODK server') - - credentials_filename = config['Survey']['ServerCredentialsFile'] - with open(credentials_filename) as credentials_file: - cred = json.load(credentials_file) +def get_ODK_form_as_csv(form_credentials: dict, jobPath: str, config: dict, status): + '''Given a dict with a single ODK form to download from an ODK Aggregate + server, obtains it and converts to csv.''' + # Caution: Not tested whether different servers can be downloded to the same ODK_output_path ODK_output_path = f"{jobPath}/ExportRawDB" # get data from ODK server description_short = 'ODK download' description_long = 'survey download from ODK server' ODK_jar = '/storage/app/EWS/General/EWS-Coordinator/ODK-Briefcase-v1.18.0.jar' + + # test + if 'blast' in form_credentials['form_id']: + ODK_jar += 'asdasdasd' + ODK_download = ['java', '-jar', ODK_jar, '--pull_aggregate', - '--form_id', cred['form_id'], + '--form_id', form_credentials['form_id'], '--storage_directory', ODK_output_path, - '--odk_url', cred['server'], - '--odk_username',cred['user'], - '--odk_password',cred['pass']] + '--odk_url', form_credentials['server'], + '--odk_username',form_credentials['user'], + '--odk_password',form_credentials['pass']] ODK_download_success = True @@ -172,17 +173,17 @@ def process_in_job_survey(jobPath,status,config,component): Path(ODK_csv_path).mkdir(parents=True, exist_ok=True) - ODK_csv_filename = 'SurveyData.csv' + ODK_csv_filename = f"SurveyData_{form_credentials['form_id']}.csv" if ODK_download_success: description_short = 'ODK export' - description_long = 'format conversion of ODK download to csv' + description_long = 'converting ODK download to csv' logger.debug(description_long) ODK_java_to_csv = ['java', '-jar', ODK_jar, '--export', - '--form_id', cred['form_id'], + '--form_id', form_credentials['form_id'], '--storage_directory',ODK_output_path, '--export_directory',ODK_csv_path, '--export_filename',ODK_csv_filename] @@ -236,19 +237,49 @@ def process_in_job_survey(jobPath,status,config,component): logger.warning(f"Using ODK download from {past_jobPath}.") - logger.debug(f"Preparing to apply removals and additions to {ODK_csv_filename}") + return + +def process_in_job_survey(jobPath,status,config,component): + logger.info('started process_in_job_survey()') + + logger.debug('Performing download(s) from ODK server') + + credentials_filename = config['Survey']['ServerCredentialsFile'] + with open(credentials_filename) as credentials_file: + + cred = json.load(credentials_file) + + assert 'forms' in cred.keys() + + ODK_csv_filenames = [] + for form in cred['forms']: + + logger.debug(f"Starting to download {form['form_id']}") + + ODK_csv_filename = get_ODK_form_as_csv(form, jobPath, config, status) + + ODK_csv_filenames += [ODK_csv_filename] + + raise Exception + + # TODO: Align formatting of different SurveyData files + # The differences between the Afghanistan form and the BangNep form are: + # - Afghanistan lacks 'subscriberid'. So provide a blank value. + + # TODO: Merge additional SurveyData files + + logger.debug(f"Preparing to apply removals and additions to ODK survey data") - processed_surveys_filepath = f"{ODK_csv_path}/Processed_{ODK_csv_filename}" + processed_surveys_filepath = f"{ODK_csv_path}/Processed_SurveyData.csv" survey_errors_to_remove_filepath = f"{config['WorkspacePath']}/SURVEYDATA_MANUAL/SurveyDataErrorsToRemove.csv" survey_additions_filepath = f"{config['WorkspacePath']}/SURVEYDATA_MANUAL/LIVE_SURVEYDATA_TOUSE.csv" RPath = '/usr/local/R/bin/Rscript' - preprocessor_script = coordinator_path+'/SurveyDataPreprocessor.R' R_process_surveys = [RPath, '--no-init-file', - preprocessor_script, + coordinator_ppath+'/SurveyDataPreprocessor.R', f"{ODK_csv_path}/{ODK_csv_filename}", survey_errors_to_remove_filepath, survey_additions_filepath, -- GitLab