diff --git a/coordinator/ProcessorEpidemiology.py b/coordinator/ProcessorEpidemiology.py index 59e7cf9bf72f3aade5fa9f57f7597fa2833f9d56..31656accdae4bccc738ed7c9174ebab011f65988 100644 --- a/coordinator/ProcessorEpidemiology.py +++ b/coordinator/ProcessorEpidemiology.py @@ -312,6 +312,20 @@ class ProcessorEpidemiology(Processor): # TODO: Some of this is modifying config before epi model is run. Determine # how to account for that + + # If this job is continuing the results of a past epi run, there are + # several additional requirements: + + # - The epi model will be expected to run from the reference date to the + # end date, and the start date to reference date defines the range of the + # job that is being continued. + + # - Each model named under config['Epidemiology']['Epi'] will also need a + # corresponding set of arguments for reading in as for depo and env, e.g. + # config['Epidemiology']['model_name']. + + is_continue = config['Epidemiology'].get('continue',False) + # initialise any needed variables reference_date_str = config['StartString'] @@ -329,6 +343,7 @@ class ProcessorEpidemiology(Processor): config['StartTimeShort'] = start_string_short config['EndTime'] = end_string + # Used to get the last jobs to continue or update from yesterday_date = datetime.datetime.strptime(reference_date_str,'%Y%m%d') - datetime.timedelta(days=1) yesterday_string = yesterday_date.strftime('%Y%m%d') @@ -472,10 +487,10 @@ class ProcessorEpidemiology(Processor): config_epi['Host']['FileNamePrepared'] = dst_host_csv # Preparing any continue-run files - for ci in config['Epidemiology']['Epi']: - - if ci.get('continue',False) is True: + if is_continue is True: + for ci in config_epi['Epi']: + model_name = ci['model'] # Get results of last day and prepare as input @@ -503,7 +518,12 @@ class ProcessorEpidemiology(Processor): if k not in short_name.keys(): config_epi[k]=v - self.logger.debug('Incremental configuration looks like:') + if is_continue: + # This will be used in the epi model and not the data preparation + continue_start_date = reference_date+datetime.timedelta(hours=3) + config_epi['StartTime'] = continue_start_date.strftime('%Y-%m-%d-%H%M') + config_epi['StartTimeShort'] = continue_start_date.strftime('%Y%m%d%H%M') + def print_item(item): self.logger.debug(f"Item {item}") self.logger.debug(json.dumps(item,indent=2)) @@ -514,6 +534,9 @@ class ProcessorEpidemiology(Processor): iterate(item) else: print_item(item) + + self.logger.debug('Incremental configuration looks like:') + iterate(config_epi) self.logger.debug('Complete configuration looks like:') @@ -542,7 +565,7 @@ class ProcessorEpidemiology(Processor): def calc_mean(arr): return 'mean', arr.mean() - for epiconf in config['Epidemiology']['Epi']: + for epiconf in config_epi['Epi']: outfile = epiconf["infectionRasterFileName"] @@ -585,25 +608,25 @@ class ProcessorEpidemiology(Processor): # slice the epi results into before forecast and in forecast - for epiconf in config['Epidemiology']['Epi']: + for epiconf in config_epi['Epi']: outfile = epiconf["infectionRasterFileName"]+'_progression.csv' # load the full epi results df_full = read_csv(outfile,header=[0],index_col=[0,1]) - column_date_fmt = f"X{config['StartTimeShort']}_X%Y%m%d%H%M" + column_date_fmt = f"X{config_epi['StartTimeShort']}_X%Y%m%d%H%M" df_full_dates = to_datetime(df_full.columns.astype('str'),format=column_date_fmt) unit_description = '' + # convert units from ha_infected/ha_cell to ha_infected/ha_wheat + if epiconf['rescale_output_by_host_raster'] is True: unit_description = '_per_ha_wheat' model_colns = df_full.columns - # convert units from ha_infected/ha_cell to ha_infected/ha_wheat - df_full = self.get_model_divided_by_host_fraction( df_full, config_epi['Host']['HostCSV'], @@ -618,7 +641,7 @@ class ProcessorEpidemiology(Processor): # determine date to cut with # plus 1 minute so midnight is associated with preceding day - date_to_cut = datetime.datetime.strptime(config['StartString']+'0001','%Y%m%d%H%M') + date_to_cut = datetime.datetime.strptime(config_epi['StartString']+'0001','%Y%m%d%H%M') dates_after_cut = df_full_dates >= date_to_cut idx = argmax(dates_after_cut)-1