diff --git a/coordinator/ProcessorEpidemiology.py b/coordinator/ProcessorEpidemiology.py index b969a383bf65f21bdeac017e1c1694df4f0c3b66..7ce7581c5e706e55900eb7543bc6b69c0b8454a4 100644 --- a/coordinator/ProcessorEpidemiology.py +++ b/coordinator/ProcessorEpidemiology.py @@ -10,7 +10,7 @@ import os import shutil from typing import List -from numpy import argmax, unique +from numpy import all, any, argmax, unique, where from pandas import read_csv, DataFrame, to_datetime from rasterio import open as rio_open @@ -176,6 +176,64 @@ def raster_to_csv(raster_fn,csv_fn): return +def get_model_divided_by_host_fraction( + dfm, + hostCSV, + model_colns=None, + **kwargs): + """when model_infection pressure has units of [ha_infected/ha_cell] + we want [ha_infected/ha_wheat] to compare with surveys + (because surveys only sample the wheat covered landscape) + so we must load the host raster and divide all model results by it.""" + + print('Converting units of prediction from ha_infected/ha_cell to ha_infect/ha_wheat') + + # load host raster + host_fn = hostCSV + host_df = read_csv(host_fn,index_col=0,header=[0,1]) + num_host_dates = len(host_df.index) + if num_host_dates == 1: + # parse static host map + index_name = host_df.index[0] + host_df.rename(index={index_name:'host_frac'},inplace=True) + + else: + # TODO: parse dynamic host map + raise Exception + + # conform the structure with infection dataframe + host_df = host_df.T + host_df['latitude'] = host_df.index.get_level_values(0).astype('float') + host_df['longitude'] = host_df.index.get_level_values(1).astype('float') + host_df = host_df.sort_values(by=['longitude','latitude'],ascending=[True,False],axis='rows') + dfm2 = dfm.merge(host_df,how='left',on=['longitude','latitude']) + + if model_colns is None: + model_colns = [coln for coln in dfm2 if ('model_' in coln) & (coln not in ['model_date','model_date_offset'])] + + print('Applying unit conversion to the following columns:\n'+'\n'.join(model_colns)) + + # loop over each column that needs converting + for coln in model_colns: + + # ensure prediction is zero wherever there is no host + #assert all(dfm2[dfm2['host_frac']==0][coln]==0.) + # divide model results by host raster in-place + # use np where to handle cases where denominator is zero + dfm2[coln] = where(dfm2['host_frac']==0,dfm2['host_frac'],dfm2[coln] / dfm2['host_frac']) + + # check for anomalously large values + where_too_big = dfm2[coln] > 1.00001 + if any(where_too_big): + msg = 'ERROR: Unit conversion failed, host area seems to be smaller than predicted infection area in a cell' + print(msg) + raise Exception + + # clip any values that are above 1 + dfm2[coln].clip(0.,1.,inplace=True) + + return dfm2 + def process_in_job_epi(jobPath,status,config,component): logger.info('started process_in_job_epi()') @@ -442,14 +500,37 @@ def process_in_job_epi(jobPath,status,config,component): outfile = epiconf["infectionRasterFileName"]+'_progression.csv' - fn_seasonsofar = epiconf["infectionRasterFileName"]+'_seasonsofar.csv' - fn_weekahead = epiconf["infectionRasterFileName"]+'_weekahead.csv' - # load the full epi results df_full = read_csv(outfile,header=[0],index_col=[0,1]) column_date_fmt = f"X{config['StartTimeShort']}_X%Y%m%d%H%M" df_full_dates = to_datetime(df_full.columns.astype('str'),format=column_date_fmt) + unit_description = '' + + if epiconf['rescale_output_by_host_raster'] is True: + + unit_description = '_per_ha_wheat' + + model_colns = df_full.columns + + # convert units from ha_infected/ha_cell to ha_infected/ha_wheat + + df_full = get_model_divided_by_host_fraction( + df_full, + config_epi['Host']['HostCSV'], + model_colns = model_colns) + + # reshape structure for consistency with original infectionRaster + df_full.set_index(['longitude','latitude'],inplace=True) + df_full.drop('host_frac',axis='columns',inplace=True) + + # save to csv + outfile_hawheat = f"{epiconf['infectionRasterFileName']}{unit_description}_progression.csv" + df_full.to_csv(outfile_hawheat,header=True,index=True) + + outfile_hawheat_final = f"{epiconf['infectionRasterFileName']}{unit_description}.csv" + df_full.iloc[:,-1].to_csv(outfile_hawheat_final,header=True,index=True) + # determine date to cut with # plus 1 minute so midnight is associated with preceding day date_to_cut = datetime.datetime.strptime(config['StartString']+'0001','%Y%m%d%H%M') @@ -469,6 +550,7 @@ def process_in_job_epi(jobPath,status,config,component): assert df_seasonsofar.name == column_name # save to csv + fn_seasonsofar = f"{epiconf['infectionRasterFileName']}{unit_description}_seasonsofar.csv" df_seasonsofar.to_csv(fn_seasonsofar,header=True,index=True) # build weekahead dataframe and save to csv @@ -481,6 +563,7 @@ def process_in_job_epi(jobPath,status,config,component): df_weekahead = df_fc_end - df_fc_start # defined column name + fn_weekahead = f"{epiconf['infectionRasterFileName']}{unit_description}_weekahead.csv" df_weekahead.name = '_'.join([df_fc_start_name,df_fc_end_name]) # save to csv @@ -556,7 +639,9 @@ def process_EWS_plotting_epi(jobPath,config): else: plotting_region_name_lower = config['Epidemiology']['EWS-Plotting']['PlottingRegionName'].lower() - epi_seasonsofar_fn = epi_filename+'_seasonsofar.csv' + epi_seasonsofar_fn = epi_filename+'_per_ha_wheat_seasonsofar.csv' + + epi_seasonincforecast_fn = epi_filename+'_per_ha_wheat.csv' seasonsofar_run_config = config['Epidemiology']['EWS-Plotting'].get('RunConfig_seasonsofar',None) @@ -589,7 +674,7 @@ def process_EWS_plotting_epi(jobPath,config): epi_processor_2.set_param_config_files(sys_params_file_arg=sys_config, chart_params_file_arg=chart_config, run_params_file_arg=run_config, - epi_input_csv_arg=epi_filename+'.csv', # for seasonplusforecast + epi_input_csv_arg=epi_seasonincforecast_fn, # for seasonplusforecast #epi_input_csv_arg=epi_filename+'_weekahead.csv', # for weekahead disease_type_arg=disease_short+'_seasonincforecast', issue_date_arg=start_string, diff --git a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json index 02fabc843e95c18fd1edf3ed3753e7fc4e7ad7c5..d6c8276e7bf238a994aa3aa8c34a1793efbb47ec 100644 --- a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json +++ b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json @@ -178,6 +178,7 @@ "modelArguments" : {}, "infectionRasterFileName" : "?", "description": "env. suitability", + "rescale_output_by_host_raster": false, "analysis" : { "vmin" : 0.0e+0, "vmax" : 1.5e+1, @@ -196,6 +197,7 @@ }, "infectionRasterFileName" : "?", "description": "SE[beta*log10(alpha*D)+gamma*P]", + "rescale_output_by_host_raster": true, "analysis" : { "vmin" : 0.0e+0, "vmax" : 1,