feat: Unit conversion of epi output

This is required for the standard epi model output, which has units of ha_infected per ha_cell, because the model risk thresholds are fitted for units of ha_infected per ha_wheat.

feat: Unit conversion of epi output
This is required for the standard epi model output, which has units of ha_infected per ha_cell, because the model risk thresholds are fitted for units of ha_infected per ha_wheat.
fb1d2c9c · J.W. Smith · ebb3413b · fb1d2c9c · fb1d2c9c
Commit fb1d2c9c authored 2 years ago by J.W. Smith
--- a/coordinator/ProcessorEpidemiology.py
+++ b/coordinator/ProcessorEpidemiology.py
@@ -10,7 +10,7 @@ import os
 import shutil
 from typing import List

-from numpy import argmax, unique
+from numpy import all, any, argmax, unique, where
 from pandas import read_csv, DataFrame, to_datetime
 from rasterio import open as rio_open

@@ -176,6 +176,64 @@ def raster_to_csv(raster_fn,csv_fn):

    return

+def get_model_divided_by_host_fraction( 
+            dfm, 
+            hostCSV, 
+            model_colns=None, 
+            **kwargs): 
+    """when model_infection pressure has units of [ha_infected/ha_cell] 
+    we want [ha_infected/ha_wheat] to compare with surveys 
+    (because surveys only sample the wheat covered landscape) 
+    so we must load the host raster and divide all model results by it.""" 
+  
+    print('Converting units of prediction from ha_infected/ha_cell to ha_infect/ha_wheat') 
+  
+    # load host raster 
+    host_fn = hostCSV 
+    host_df = read_csv(host_fn,index_col=0,header=[0,1]) 
+    num_host_dates = len(host_df.index) 
+    if num_host_dates == 1: 
+        # parse static host map 
+        index_name = host_df.index[0] 
+        host_df.rename(index={index_name:'host_frac'},inplace=True) 
+  
+    else: 
+        # TODO: parse dynamic host map 
+        raise Exception 
+  
+    # conform the structure with infection dataframe 
+    host_df = host_df.T 
+    host_df['latitude'] = host_df.index.get_level_values(0).astype('float') 
+    host_df['longitude'] = host_df.index.get_level_values(1).astype('float') 
+    host_df = host_df.sort_values(by=['longitude','latitude'],ascending=[True,False],axis='rows') 
+    dfm2 = dfm.merge(host_df,how='left',on=['longitude','latitude']) 
+  
+    if model_colns is None: 
+        model_colns = [coln for coln in dfm2 if ('model_' in coln) & (coln not in ['model_date','model_date_offset'])] 
+  
+    print('Applying unit conversion to the following columns:\n'+'\n'.join(model_colns)) 
+  
+    # loop over each column that needs converting 
+    for coln in model_colns: 
+
+        # ensure prediction is zero wherever there is no host
+        #assert all(dfm2[dfm2['host_frac']==0][coln]==0.)
+        # divide model results by host raster in-place
+        # use np where to handle cases where denominator is zero
+        dfm2[coln] = where(dfm2['host_frac']==0,dfm2['host_frac'],dfm2[coln] / dfm2['host_frac'])
+
+        # check for anomalously large values 
+        where_too_big = dfm2[coln] > 1.00001 
+        if any(where_too_big): 
+            msg = 'ERROR: Unit conversion failed, host area seems to be smaller than predicted infection area in a cell' 
+            print(msg) 
+            raise Exception 
+ 
+        # clip any values that are above 1 
+        dfm2[coln].clip(0.,1.,inplace=True) 
+ 
+    return dfm2 
+
 def process_in_job_epi(jobPath,status,config,component):
    logger.info('started process_in_job_epi()')

@@ -442,14 +500,37 @@ def process_in_job_epi(jobPath,status,config,component):

            outfile = epiconf["infectionRasterFileName"]+'_progression.csv'

-            fn_seasonsofar = epiconf["infectionRasterFileName"]+'_seasonsofar.csv'
-            fn_weekahead = epiconf["infectionRasterFileName"]+'_weekahead.csv'
-
            # load the full epi results
            df_full = read_csv(outfile,header=[0],index_col=[0,1])
            column_date_fmt = f"X{config['StartTimeShort']}_X%Y%m%d%H%M"
            df_full_dates = to_datetime(df_full.columns.astype('str'),format=column_date_fmt)

+            unit_description = ''
+
+            if epiconf['rescale_output_by_host_raster'] is True:
+
+                unit_description = '_per_ha_wheat'
+
+                model_colns = df_full.columns
+
+                # convert units from ha_infected/ha_cell to ha_infected/ha_wheat
+           
+                df_full = get_model_divided_by_host_fraction(
+                        df_full,
+                        config_epi['Host']['HostCSV'],
+                        model_colns = model_colns)
+
+                # reshape structure for consistency with original infectionRaster
+                df_full.set_index(['longitude','latitude'],inplace=True)
+                df_full.drop('host_frac',axis='columns',inplace=True)
+
+                # save to csv
+                outfile_hawheat = f"{epiconf['infectionRasterFileName']}{unit_description}_progression.csv"
+                df_full.to_csv(outfile_hawheat,header=True,index=True)
+
+                outfile_hawheat_final = f"{epiconf['infectionRasterFileName']}{unit_description}.csv"
+                df_full.iloc[:,-1].to_csv(outfile_hawheat_final,header=True,index=True)
+
            # determine date to cut with
            # plus 1 minute so midnight is associated with preceding day
            date_to_cut = datetime.datetime.strptime(config['StartString']+'0001','%Y%m%d%H%M')
@@ -469,6 +550,7 @@ def process_in_job_epi(jobPath,status,config,component):
            assert df_seasonsofar.name == column_name

            #  save to csv
+            fn_seasonsofar = f"{epiconf['infectionRasterFileName']}{unit_description}_seasonsofar.csv"
            df_seasonsofar.to_csv(fn_seasonsofar,header=True,index=True)

            # build weekahead dataframe and save to csv
@@ -481,6 +563,7 @@ def process_in_job_epi(jobPath,status,config,component):
            df_weekahead = df_fc_end - df_fc_start

            # defined column name
+            fn_weekahead = f"{epiconf['infectionRasterFileName']}{unit_description}_weekahead.csv"
            df_weekahead.name = '_'.join([df_fc_start_name,df_fc_end_name])

            # save to csv
@@ -556,7 +639,9 @@ def process_EWS_plotting_epi(jobPath,config):
        else:
            plotting_region_name_lower = config['Epidemiology']['EWS-Plotting']['PlottingRegionName'].lower()

-        epi_seasonsofar_fn = epi_filename+'_seasonsofar.csv'
+        epi_seasonsofar_fn = epi_filename+'_per_ha_wheat_seasonsofar.csv'
+
+        epi_seasonincforecast_fn = epi_filename+'_per_ha_wheat.csv'

        seasonsofar_run_config = config['Epidemiology']['EWS-Plotting'].get('RunConfig_seasonsofar',None)

@@ -589,7 +674,7 @@ def process_EWS_plotting_epi(jobPath,config):
        epi_processor_2.set_param_config_files(sys_params_file_arg=sys_config,
                                        chart_params_file_arg=chart_config,
                                        run_params_file_arg=run_config,
-                                        epi_input_csv_arg=epi_filename+'.csv', # for seasonplusforecast
+                                        epi_input_csv_arg=epi_seasonincforecast_fn, # for seasonplusforecast
                                        #epi_input_csv_arg=epi_filename+'_weekahead.csv', # for weekahead
                                        disease_type_arg=disease_short+'_seasonincforecast',
                                        issue_date_arg=start_string,

--- a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json
+++ b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json
@@ -178,6 +178,7 @@
                "modelArguments" : {},
                "infectionRasterFileName" : "?",
                "description": "env. suitability",
+                "rescale_output_by_host_raster": false,
                "analysis" : {
                    "vmin" : 0.0e+0,
                    "vmax" : 1.5e+1,
@@ -196,6 +197,7 @@
                },
                "infectionRasterFileName" : "?",
                "description": "SE[beta*log10(alpha*D)+gamma*P]",
+                "rescale_output_by_host_raster": true,
                "analysis" : {
                    "vmin" : 0.0e+0,
                    "vmax" : 1,