From 552cc1eb8c1eb14b1ec56a88de7f47f077ec9149 Mon Sep 17 00:00:00 2001 From: Jake Smith <jws52@cam.ac.uk> Date: Fri, 6 May 2022 16:53:10 +0100 Subject: [PATCH] feat: Revised Epidemiology timespan and plotting * configurable timespan can now state a fixed date in the past with a string <YYYYMMDD>. * Epi plotting now generates two figures, one for integrated epi risk over past dates (from start of CalculationSpanDays until --start-date date), and a second for integrated epi risk over future dates (from date of --start-date until end of configured CalculationSpanDays) --- ProcessorComponents.py | 143 +++++++++++++++++++++++++++++++++++------ 1 file changed, 124 insertions(+), 19 deletions(-) diff --git a/ProcessorComponents.py b/ProcessorComponents.py index 98bcee3..360f6fd 100644 --- a/ProcessorComponents.py +++ b/ProcessorComponents.py @@ -49,6 +49,12 @@ short_name = { 'Survey' : 'SURVEYDATA', } +disease_latin_name_dict = { + 'StemRust' : 'P_GRAMINIS', + 'StripeRust' : 'P_STRIIFORMIS', + 'LeafRust' : 'P_RECONDITA', + 'WheatBlast' : 'M_ORYZAE'} + def process_pre_job_survey(input_args): '''Returns a boolean as to whether the job is ready for full processing.''' logger.info('started process_pre_job_survey(), nothing to do') @@ -130,9 +136,6 @@ def calc_epi_date_range(init_str,span_days=[0,6]): init_date = datetime.datetime.strptime(init_str,'%Y%m%d') - date_shift0 = datetime.timedelta(days=span_days[0]) - date_shift1 = datetime.timedelta(days=span_days[1]) - # note that filename date represents preceding 3 hours, so day's data # starts at file timestamp 0300 UTC threehour_shift = datetime.timedelta(hours=3) @@ -140,9 +143,24 @@ def calc_epi_date_range(init_str,span_days=[0,6]): # add 24hrs so that final day is fully included day_shift = datetime.timedelta(days=1) - start_date = init_date + date_shift0 + threehour_shift + # if more than 999 days + if len(str(span_days[0]))>3: + # assume it is a date string + start_date = datetime.datetime.strptime(span_days[0]+'0300','%Y%m%d%H%M') + else: + date_shift0 = datetime.timedelta(days=span_days[0]) + + start_date = init_date + date_shift0 + threehour_shift + + if len(str(span_days[1]))>3: + # assume it is a date string + end_date = datetime.strptime(span_days[1]+'0000','%Y%m%d%H%M') + + end_date = end_date + day_shift + else: + date_shift1 = datetime.timedelta(days=span_days[1]) - end_date = init_date + date_shift1 +day_shift + end_date = init_date + date_shift1 +day_shift return start_date, end_date @@ -1618,6 +1636,7 @@ def process_in_job_epi(jobPath,status,config,component): json.dump(config_epi,write_file,indent=4) # run epi model + try: EpiModel.run_epi_model(f"{case_specific_path}/{config_filename}.json") except: @@ -1656,6 +1675,8 @@ def process_in_job_epi(jobPath,status,config,component): logger.debug('Saving tif output as png for easier viewing') plotRaster.save_raster_as_png(outfile) + # comparison figure + # TODO: make this plot configurable? with function or args? #logger.info('Plotting epi output alongside contributing components') # figure_func = getattr(EpiAnalysis,'plot_compare_host_env_dep_infection') @@ -1665,7 +1686,6 @@ def process_in_job_epi(jobPath,status,config,component): # isolate the config for this function, in case of modifications config_epi_for_comparison = config_epi.copy() - # comparison figure fig,axes,cases = figure_func( config_epi_for_comparison, start_str = start_string, @@ -1675,6 +1695,52 @@ def process_in_job_epi(jobPath,status,config,component): fig.savefig(SaveFileName+'.png',dpi=300) + # slice the epi results into before forecast and in forecast + + for epiconf in config['Epidemiology']['Epi']: + + outfile = epiconf["infectionRasterFileName"]+'_progression.csv' + + fn_seasonsofar = epiconf["infectionRasterFileName"]+'_seasonsofar.csv' + fn_weekahead = epiconf["infectionRasterFileName"]+'_weekahead.csv' + + # load the full epi results + df_full = read_csv(outfile,header=[0],index_col=[0,1]) + column_date_fmt = f"X{config['StartTimeShort']}_X%Y%m%d%H%M" + df_full_dates = to_datetime(df_full.columns.astype('str'),format=column_date_fmt) + + # determine date to cut with + # plus 1 minute so midnight is associated with preceding day + date_to_cut = datetime.datetime.strptime(config['StartString']+'0001','%Y%m%d%H%M') + dates_after_cut = df_full_dates >= date_to_cut + idx = argmax(dates_after_cut)-1 + + # build seasonsofar dataframe (only need the last date) + df_seasonsofar = df_full.iloc[:,idx] + + # check column name is defined as expected + # from epi start time to forecast start time + column_name = f"X{config['StartTimeShort']}_X{config['StartString']}0000" + assert df_seasonsofar.name == column_name + + # save to csv + df_seasonsofar.to_csv(fn_seasonsofar,header=True,index=True) + + # build weekahead dataframe and save to csv + df_fc_start = df_full.iloc[:,idx] + df_fc_start_name = df_fc_start.name.split('_')[-1] + + df_fc_end = df_full.iloc[:,-1] + df_fc_end_name = df_fc_end.name.split('_')[-1] + + df_weekahead = df_fc_end - df_fc_start + + # defined column name + df_weekahead.name = '_'.join([df_fc_start_name,df_fc_end_name]) + + # save to csv + df_weekahead.to_csv(fn_weekahead,header=True,index=True) + return def do_nothing(*args, **kwargs): @@ -1868,7 +1934,6 @@ def process_EWS_plotting_dep(jobPath,config): return EWSPlottingOutputs -#TODO: def process_EWS_plotting_epi(jobPath,config): '''Returns a list of output files for transfer.''' @@ -1899,16 +1964,26 @@ def process_EWS_plotting_epi(jobPath,config): python_script = config['Epidemiology']['EWS-Plotting']['PythonScript'] - run_config = config['Epidemiology']['EWS-Plotting']['RunConfig'] + run_config = config['Epidemiology']['EWS-Plotting']['RunConfig_seasonsofar'] + + chart_config = config['Epidemiology']['EWS-Plotting']['ChartConfig'] # hard wired for now - sys_config = f"{plot_path}/python/data/json_config/SYS_CONFIG_PINE.json" + sys_config = f"{plot_path}/python/data/json_config/sys/SYS_CONFIG_PINE.json" # use the first matching epi formulation # TODO: Is there a more efficient way to select? - epi_filename = [ce['infectionRasterFileName']+'.csv' for ce in config['Epidemiology']['Epi'] if ce['model']==epi_case_operational][0] + epi_filename = [ce['infectionRasterFileName'] for ce in config['Epidemiology']['Epi'] if ce['model']==epi_case_operational][0] + + dep_regionnames = ['SouthAsia','Ethiopia'] - deposition_dir = f"{config['WorkspacePath']}DEPOSITION_{start_string}/WR_NAME_SouthAsia_{start_string}/" + # TODO get deposition_dir from config['Epidemiology']['Deposition']['PathTemplate'] + dep_regionname = 'Ethiopia' #SouthAsia + + deposition_dir = f"{config['WorkspacePath']}DEPOSITION_{start_string}/WR_NAME_{dep_regionname}_{start_string}/" + + # TODO: handle multiple diseases and regions in Processor as a loop, or in the config + deposition_disease_name = [disease_latin_name_dict[disease]+'_DEPOSITION' for disease in diseases][0] ews_plot_dir = f"{jobPath}/plotting/" @@ -1927,28 +2002,58 @@ def process_EWS_plotting_epi(jobPath,config): disease_to_add = disease.replace('Rust','') epi_filename = epi_filename.replace(disease_to_drop,disease_to_add) - # prepare command + # prepare command for seasonsofar # TODO: Is output unable to distinguish multiple diseases? - plot_command = [ + plot_command_1 = [ '/storage/app/EWS/General/EWS-python/runpy_ews_plotting_env.sh', python_script, - '-epi',epi_filename, + '-csv',epi_filename+'_seasonsofar.csv', '-dt',disease_short, + '-mt','epi-seasonsofar', '-ws',deposition_dir, + '-wsdn',deposition_disease_name, '-o',ews_plot_dir, '-d',start_string, '-sc',sys_config, + '-cc',chart_config, '-rc',run_config] - logger.info(f"Running EWS-Plotting command:\n'{' '.join(plot_command)}'") + logger.info(f"Running EWS-Plotting command:\n'{' '.join(plot_command_1)}'") - description_short = 'EWS-Plotting' + description_short = 'EWS-Plotting seasonsofar' description_long = description_short - subprocess_and_log(plot_command, description_short, description_long,env=env_map) + subprocess_and_log(plot_command_1, description_short, description_long,env=env_map) + + # prepare command for weekahead + + run_config = config['Epidemiology']['EWS-Plotting']['RunConfig_weekahead'] + + # TODO: Is output unable to distinguish multiple diseases? + plot_command_2 = [ + '/storage/app/EWS/General/EWS-python/runpy_ews_plotting_env.sh', + python_script, + '-csv',epi_filename+'_weekahead.csv', + '-dt',disease_short, + '-mt','epi-weekahead', + '-ws',deposition_dir, + '-wsdn',deposition_disease_name, + '-o',ews_plot_dir, + '-d',start_string, + '-sc',sys_config, + '-cc',chart_config, + '-rc',run_config] + + logger.info(f"Running EWS-Plotting command:\n'{' '.join(plot_command_2)}'") + + description_short = 'EWS-Plotting weekahead' + description_long = description_short + subprocess_and_log(plot_command_2, description_short, description_long,env=env_map) + + region_name_lower = config['RegionName'].lower() # check the output - EWSPlottingOutputDir = f"{ews_plot_dir}/epi/images/" - EWSPlottingOutputGlobs += [f"{EWSPlottingOutputDir}infection_{config['RegionName'].lower()}_{disease_short}*.png"] + EWSPlottingOutputDir = f"{ews_plot_dir}/images/" + EWSPlottingOutputGlobs += [f"{EWSPlottingOutputDir}infection_{region_name_lower}_{disease_short}*.png"] EWSPlottingOutputGlobs = get_only_existing_globs(EWSPlottingOutputGlobs,inplace=False) -- GitLab