From 552cc1eb8c1eb14b1ec56a88de7f47f077ec9149 Mon Sep 17 00:00:00 2001
From: Jake Smith <jws52@cam.ac.uk>
Date: Fri, 6 May 2022 16:53:10 +0100
Subject: [PATCH] feat: Revised Epidemiology timespan and plotting

* configurable timespan can now state a fixed date in the past with a
  string <YYYYMMDD>.

* Epi plotting now generates two figures, one for integrated epi risk
  over past dates (from start of CalculationSpanDays until
   --start-date date), and a second for
  integrated epi risk over future dates (from date of --start-date
  until  end of configured CalculationSpanDays)
---
 ProcessorComponents.py | 143 +++++++++++++++++++++++++++++++++++------
 1 file changed, 124 insertions(+), 19 deletions(-)

diff --git a/ProcessorComponents.py b/ProcessorComponents.py
index 98bcee3..360f6fd 100644
--- a/ProcessorComponents.py
+++ b/ProcessorComponents.py
@@ -49,6 +49,12 @@ short_name = {
         'Survey' : 'SURVEYDATA',
         }
 
+disease_latin_name_dict = {
+        'StemRust' : 'P_GRAMINIS',
+        'StripeRust' : 'P_STRIIFORMIS',
+        'LeafRust' : 'P_RECONDITA',
+        'WheatBlast' : 'M_ORYZAE'}
+
 def process_pre_job_survey(input_args):
     '''Returns a boolean as to whether the job is ready for full processing.'''
     logger.info('started process_pre_job_survey(), nothing to do')
@@ -130,9 +136,6 @@ def calc_epi_date_range(init_str,span_days=[0,6]):
 
     init_date = datetime.datetime.strptime(init_str,'%Y%m%d')
 
-    date_shift0 = datetime.timedelta(days=span_days[0])
-    date_shift1 = datetime.timedelta(days=span_days[1])
-
     # note that filename date represents preceding 3 hours, so day's data
     #  starts at file timestamp 0300 UTC
     threehour_shift = datetime.timedelta(hours=3)
@@ -140,9 +143,24 @@ def calc_epi_date_range(init_str,span_days=[0,6]):
     # add 24hrs so that final day is fully included
     day_shift = datetime.timedelta(days=1)
 
-    start_date = init_date + date_shift0 + threehour_shift
+    # if more than 999 days
+    if len(str(span_days[0]))>3: 
+        # assume it is a date string
+        start_date = datetime.datetime.strptime(span_days[0]+'0300','%Y%m%d%H%M')
+    else:
+        date_shift0 = datetime.timedelta(days=span_days[0])
+    
+        start_date = init_date + date_shift0 + threehour_shift
+
+    if len(str(span_days[1]))>3: 
+        # assume it is a date string
+        end_date = datetime.strptime(span_days[1]+'0000','%Y%m%d%H%M')
+        
+        end_date = end_date + day_shift
+    else:
+        date_shift1 = datetime.timedelta(days=span_days[1])
 
-    end_date = init_date + date_shift1 +day_shift
+        end_date = init_date + date_shift1 +day_shift
 
     return start_date, end_date
 
@@ -1618,6 +1636,7 @@ def process_in_job_epi(jobPath,status,config,component):
             json.dump(config_epi,write_file,indent=4)
 
         # run epi model
+        
         try:
             EpiModel.run_epi_model(f"{case_specific_path}/{config_filename}.json")
         except:
@@ -1656,6 +1675,8 @@ def process_in_job_epi(jobPath,status,config,component):
                 logger.debug('Saving tif output as png for easier viewing')
                 plotRaster.save_raster_as_png(outfile)
 
+        # comparison figure
+
         # TODO: make this plot configurable? with function or args?
         #logger.info('Plotting epi output alongside contributing components')
         # figure_func = getattr(EpiAnalysis,'plot_compare_host_env_dep_infection')
@@ -1665,7 +1686,6 @@ def process_in_job_epi(jobPath,status,config,component):
         # isolate the config for this function, in case of modifications
         config_epi_for_comparison = config_epi.copy()
 
-        # comparison figure
         fig,axes,cases = figure_func(
                 config_epi_for_comparison,
                 start_str = start_string,
@@ -1675,6 +1695,52 @@ def process_in_job_epi(jobPath,status,config,component):
 
         fig.savefig(SaveFileName+'.png',dpi=300)
 
+        # slice the epi results into before forecast and in forecast
+
+        for epiconf in config['Epidemiology']['Epi']:
+
+            outfile = epiconf["infectionRasterFileName"]+'_progression.csv'
+
+            fn_seasonsofar = epiconf["infectionRasterFileName"]+'_seasonsofar.csv'
+            fn_weekahead = epiconf["infectionRasterFileName"]+'_weekahead.csv'
+
+            # load the full epi results
+            df_full = read_csv(outfile,header=[0],index_col=[0,1])
+            column_date_fmt = f"X{config['StartTimeShort']}_X%Y%m%d%H%M"
+            df_full_dates = to_datetime(df_full.columns.astype('str'),format=column_date_fmt)
+
+            # determine date to cut with
+            # plus 1 minute so midnight is associated with preceding day
+            date_to_cut = datetime.datetime.strptime(config['StartString']+'0001','%Y%m%d%H%M')
+            dates_after_cut = df_full_dates >= date_to_cut
+            idx = argmax(dates_after_cut)-1
+
+            # build seasonsofar dataframe (only need the last date)
+            df_seasonsofar = df_full.iloc[:,idx]
+
+            # check column name is defined as expected
+            # from epi start time to forecast start time
+            column_name = f"X{config['StartTimeShort']}_X{config['StartString']}0000"
+            assert df_seasonsofar.name == column_name
+            
+            #  save to csv
+            df_seasonsofar.to_csv(fn_seasonsofar,header=True,index=True)
+
+            # build weekahead dataframe and save to csv
+            df_fc_start = df_full.iloc[:,idx]
+            df_fc_start_name = df_fc_start.name.split('_')[-1]
+
+            df_fc_end = df_full.iloc[:,-1]
+            df_fc_end_name = df_fc_end.name.split('_')[-1]
+
+            df_weekahead = df_fc_end - df_fc_start
+
+            # defined column name
+            df_weekahead.name = '_'.join([df_fc_start_name,df_fc_end_name])
+            
+            # save to csv
+            df_weekahead.to_csv(fn_weekahead,header=True,index=True)
+
     return
 
 def do_nothing(*args, **kwargs):
@@ -1868,7 +1934,6 @@ def process_EWS_plotting_dep(jobPath,config):
 
     return EWSPlottingOutputs
 
-#TODO:
 def process_EWS_plotting_epi(jobPath,config):
     '''Returns a list of output files for transfer.'''
 
@@ -1899,16 +1964,26 @@ def process_EWS_plotting_epi(jobPath,config):
 
     python_script = config['Epidemiology']['EWS-Plotting']['PythonScript']
 
-    run_config = config['Epidemiology']['EWS-Plotting']['RunConfig']
+    run_config = config['Epidemiology']['EWS-Plotting']['RunConfig_seasonsofar']
+
+    chart_config = config['Epidemiology']['EWS-Plotting']['ChartConfig']
 
     # hard wired for now
-    sys_config = f"{plot_path}/python/data/json_config/SYS_CONFIG_PINE.json"
+    sys_config = f"{plot_path}/python/data/json_config/sys/SYS_CONFIG_PINE.json"
 
     # use the first matching epi formulation
     # TODO: Is there a more efficient way to select?
-    epi_filename = [ce['infectionRasterFileName']+'.csv' for ce in config['Epidemiology']['Epi'] if ce['model']==epi_case_operational][0]
+    epi_filename = [ce['infectionRasterFileName'] for ce in config['Epidemiology']['Epi'] if ce['model']==epi_case_operational][0]
+
+    dep_regionnames = ['SouthAsia','Ethiopia']
 
-    deposition_dir = f"{config['WorkspacePath']}DEPOSITION_{start_string}/WR_NAME_SouthAsia_{start_string}/"
+    # TODO get deposition_dir from config['Epidemiology']['Deposition']['PathTemplate']
+    dep_regionname = 'Ethiopia' #SouthAsia
+
+    deposition_dir = f"{config['WorkspacePath']}DEPOSITION_{start_string}/WR_NAME_{dep_regionname}_{start_string}/"
+
+    # TODO: handle multiple diseases and regions in Processor as a loop, or in the config 
+    deposition_disease_name = [disease_latin_name_dict[disease]+'_DEPOSITION' for disease in diseases][0]
 
     ews_plot_dir = f"{jobPath}/plotting/"
 
@@ -1927,28 +2002,58 @@ def process_EWS_plotting_epi(jobPath,config):
         disease_to_add = disease.replace('Rust','')
         epi_filename = epi_filename.replace(disease_to_drop,disease_to_add)
 
-        # prepare command
+        # prepare command for seasonsofar
         # TODO: Is output unable to distinguish multiple diseases?
-        plot_command = [
+        plot_command_1 = [
                 '/storage/app/EWS/General/EWS-python/runpy_ews_plotting_env.sh',
                 python_script,
-                '-epi',epi_filename,
+                '-csv',epi_filename+'_seasonsofar.csv',
                 '-dt',disease_short,
+                '-mt','epi-seasonsofar',
                 '-ws',deposition_dir,
+                '-wsdn',deposition_disease_name,
                 '-o',ews_plot_dir,
                 '-d',start_string,
                 '-sc',sys_config,
+                '-cc',chart_config,
                 '-rc',run_config]
 
-        logger.info(f"Running EWS-Plotting command:\n'{' '.join(plot_command)}'")
+        logger.info(f"Running EWS-Plotting command:\n'{' '.join(plot_command_1)}'")
 
-        description_short = 'EWS-Plotting'
+        description_short = 'EWS-Plotting seasonsofar'
         description_long = description_short
-        subprocess_and_log(plot_command, description_short, description_long,env=env_map)
+        subprocess_and_log(plot_command_1, description_short, description_long,env=env_map)
+    
+        # prepare command for weekahead
+        
+        run_config = config['Epidemiology']['EWS-Plotting']['RunConfig_weekahead']
+
+        # TODO: Is output unable to distinguish multiple diseases?
+        plot_command_2 = [
+                '/storage/app/EWS/General/EWS-python/runpy_ews_plotting_env.sh',
+                python_script,
+                '-csv',epi_filename+'_weekahead.csv',
+                '-dt',disease_short,
+                '-mt','epi-weekahead',
+                '-ws',deposition_dir,
+                '-wsdn',deposition_disease_name,
+                '-o',ews_plot_dir,
+                '-d',start_string,
+                '-sc',sys_config,
+                '-cc',chart_config,
+                '-rc',run_config]
+
+        logger.info(f"Running EWS-Plotting command:\n'{' '.join(plot_command_2)}'")
+
+        description_short = 'EWS-Plotting weekahead'
+        description_long = description_short
+        subprocess_and_log(plot_command_2, description_short, description_long,env=env_map)
+
+        region_name_lower = config['RegionName'].lower()
 
         # check the output
-        EWSPlottingOutputDir = f"{ews_plot_dir}/epi/images/"
-        EWSPlottingOutputGlobs += [f"{EWSPlottingOutputDir}infection_{config['RegionName'].lower()}_{disease_short}*.png"]
+        EWSPlottingOutputDir = f"{ews_plot_dir}/images/"
+        EWSPlottingOutputGlobs += [f"{EWSPlottingOutputDir}infection_{region_name_lower}_{disease_short}*.png"]
 
         EWSPlottingOutputGlobs = get_only_existing_globs(EWSPlottingOutputGlobs,inplace=False)
 
-- 
GitLab