FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
Commit fb1d2c9c authored by J.W. Smith's avatar J.W. Smith
Browse files

feat: Unit conversion of epi output

This is required for the standard epi model output, which has units of
ha_infected per ha_cell, because the model risk thresholds are fitted
for units of ha_infected per ha_wheat.
parent ebb3413b
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,7 @@ import os
import shutil
from typing import List
from numpy import argmax, unique
from numpy import all, any, argmax, unique, where
from pandas import read_csv, DataFrame, to_datetime
from rasterio import open as rio_open
......@@ -176,6 +176,64 @@ def raster_to_csv(raster_fn,csv_fn):
return
def get_model_divided_by_host_fraction(
dfm,
hostCSV,
model_colns=None,
**kwargs):
"""when model_infection pressure has units of [ha_infected/ha_cell]
we want [ha_infected/ha_wheat] to compare with surveys
(because surveys only sample the wheat covered landscape)
so we must load the host raster and divide all model results by it."""
print('Converting units of prediction from ha_infected/ha_cell to ha_infect/ha_wheat')
# load host raster
host_fn = hostCSV
host_df = read_csv(host_fn,index_col=0,header=[0,1])
num_host_dates = len(host_df.index)
if num_host_dates == 1:
# parse static host map
index_name = host_df.index[0]
host_df.rename(index={index_name:'host_frac'},inplace=True)
else:
# TODO: parse dynamic host map
raise Exception
# conform the structure with infection dataframe
host_df = host_df.T
host_df['latitude'] = host_df.index.get_level_values(0).astype('float')
host_df['longitude'] = host_df.index.get_level_values(1).astype('float')
host_df = host_df.sort_values(by=['longitude','latitude'],ascending=[True,False],axis='rows')
dfm2 = dfm.merge(host_df,how='left',on=['longitude','latitude'])
if model_colns is None:
model_colns = [coln for coln in dfm2 if ('model_' in coln) & (coln not in ['model_date','model_date_offset'])]
print('Applying unit conversion to the following columns:\n'+'\n'.join(model_colns))
# loop over each column that needs converting
for coln in model_colns:
# ensure prediction is zero wherever there is no host
#assert all(dfm2[dfm2['host_frac']==0][coln]==0.)
# divide model results by host raster in-place
# use np where to handle cases where denominator is zero
dfm2[coln] = where(dfm2['host_frac']==0,dfm2['host_frac'],dfm2[coln] / dfm2['host_frac'])
# check for anomalously large values
where_too_big = dfm2[coln] > 1.00001
if any(where_too_big):
msg = 'ERROR: Unit conversion failed, host area seems to be smaller than predicted infection area in a cell'
print(msg)
raise Exception
# clip any values that are above 1
dfm2[coln].clip(0.,1.,inplace=True)
return dfm2
def process_in_job_epi(jobPath,status,config,component):
logger.info('started process_in_job_epi()')
......@@ -442,14 +500,37 @@ def process_in_job_epi(jobPath,status,config,component):
outfile = epiconf["infectionRasterFileName"]+'_progression.csv'
fn_seasonsofar = epiconf["infectionRasterFileName"]+'_seasonsofar.csv'
fn_weekahead = epiconf["infectionRasterFileName"]+'_weekahead.csv'
# load the full epi results
df_full = read_csv(outfile,header=[0],index_col=[0,1])
column_date_fmt = f"X{config['StartTimeShort']}_X%Y%m%d%H%M"
df_full_dates = to_datetime(df_full.columns.astype('str'),format=column_date_fmt)
unit_description = ''
if epiconf['rescale_output_by_host_raster'] is True:
unit_description = '_per_ha_wheat'
model_colns = df_full.columns
# convert units from ha_infected/ha_cell to ha_infected/ha_wheat
df_full = get_model_divided_by_host_fraction(
df_full,
config_epi['Host']['HostCSV'],
model_colns = model_colns)
# reshape structure for consistency with original infectionRaster
df_full.set_index(['longitude','latitude'],inplace=True)
df_full.drop('host_frac',axis='columns',inplace=True)
# save to csv
outfile_hawheat = f"{epiconf['infectionRasterFileName']}{unit_description}_progression.csv"
df_full.to_csv(outfile_hawheat,header=True,index=True)
outfile_hawheat_final = f"{epiconf['infectionRasterFileName']}{unit_description}.csv"
df_full.iloc[:,-1].to_csv(outfile_hawheat_final,header=True,index=True)
# determine date to cut with
# plus 1 minute so midnight is associated with preceding day
date_to_cut = datetime.datetime.strptime(config['StartString']+'0001','%Y%m%d%H%M')
......@@ -469,6 +550,7 @@ def process_in_job_epi(jobPath,status,config,component):
assert df_seasonsofar.name == column_name
# save to csv
fn_seasonsofar = f"{epiconf['infectionRasterFileName']}{unit_description}_seasonsofar.csv"
df_seasonsofar.to_csv(fn_seasonsofar,header=True,index=True)
# build weekahead dataframe and save to csv
......@@ -481,6 +563,7 @@ def process_in_job_epi(jobPath,status,config,component):
df_weekahead = df_fc_end - df_fc_start
# defined column name
fn_weekahead = f"{epiconf['infectionRasterFileName']}{unit_description}_weekahead.csv"
df_weekahead.name = '_'.join([df_fc_start_name,df_fc_end_name])
# save to csv
......@@ -556,7 +639,9 @@ def process_EWS_plotting_epi(jobPath,config):
else:
plotting_region_name_lower = config['Epidemiology']['EWS-Plotting']['PlottingRegionName'].lower()
epi_seasonsofar_fn = epi_filename+'_seasonsofar.csv'
epi_seasonsofar_fn = epi_filename+'_per_ha_wheat_seasonsofar.csv'
epi_seasonincforecast_fn = epi_filename+'_per_ha_wheat.csv'
seasonsofar_run_config = config['Epidemiology']['EWS-Plotting'].get('RunConfig_seasonsofar',None)
......@@ -589,7 +674,7 @@ def process_EWS_plotting_epi(jobPath,config):
epi_processor_2.set_param_config_files(sys_params_file_arg=sys_config,
chart_params_file_arg=chart_config,
run_params_file_arg=run_config,
epi_input_csv_arg=epi_filename+'.csv', # for seasonplusforecast
epi_input_csv_arg=epi_seasonincforecast_fn, # for seasonplusforecast
#epi_input_csv_arg=epi_filename+'_weekahead.csv', # for weekahead
disease_type_arg=disease_short+'_seasonincforecast',
issue_date_arg=start_string,
......
......@@ -178,6 +178,7 @@
"modelArguments" : {},
"infectionRasterFileName" : "?",
"description": "env. suitability",
"rescale_output_by_host_raster": false,
"analysis" : {
"vmin" : 0.0e+0,
"vmax" : 1.5e+1,
......@@ -196,6 +197,7 @@
},
"infectionRasterFileName" : "?",
"description": "SE[beta*log10(alpha*D)+gamma*P]",
"rescale_output_by_host_raster": true,
"analysis" : {
"vmin" : 0.0e+0,
"vmax" : 1,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment