From dd06ec1b891d9c2d86e204bb7789ac2de638db0a Mon Sep 17 00:00:00 2001
From: jws52 <jws52@cam.ac.uk>
Date: Tue, 10 Oct 2023 11:07:21 +0100
Subject: [PATCH] refactor: Move some epi prep to EpiPrep submodule

---
 coordinator/ProcessorEpidemiology.py | 198 +++++++--------------------
 1 file changed, 53 insertions(+), 145 deletions(-)

diff --git a/coordinator/ProcessorEpidemiology.py b/coordinator/ProcessorEpidemiology.py
index e823a2e..59e7cf9 100644
--- a/coordinator/ProcessorEpidemiology.py
+++ b/coordinator/ProcessorEpidemiology.py
@@ -21,7 +21,7 @@ from EpiModel import ( # created by rs481
     EpiModel,
     plotRaster
 )
-from EpiModel.EpiPrep import lister, loader, prep, updater
+from EpiModel.EpiPrep import prep
 from Processor import Processor
 from ews_postprocessing.epi.epi_post_processor import EPIPostPostProcessor
 
@@ -319,8 +319,6 @@ class ProcessorEpidemiology(Processor):
 
         start_date, end_date = self.calc_epi_date_range(reference_date_str,config['Epidemiology']['CalculationSpanDays'])
 
-        date_diff = end_date - start_date
-
         start_string = start_date.strftime('%Y-%m-%d-%H%M')
         start_string_short = start_date.strftime('%Y%m%d%H%M')
         end_string = end_date.strftime('%Y-%m-%d-%H%M')
@@ -336,114 +334,6 @@ class ProcessorEpidemiology(Processor):
 
         diseases = config['Epidemiology']['DiseaseNames']
 
-        def gather_dependent_models(
-                config_epi,
-                config,
-                variable_name,
-                start_date,
-                reference_date,
-                end_date,
-                jobDataPath,
-                lastjobDataPath,
-                status,
-                component='Deposition'):
-
-
-            # This function is only prepared for components in this list
-            assert component in ['Deposition','Environment','Epidemiology']
-
-            # TODO: Simplify the set of required arguments. Check if config is necessary.
-
-            config_epi[component]['VariableName'] = variable_name # disease_latin_name_dict[disease]+'_DEPOSITION'
-
-            config_epi[component]['FileNamePrepared'] = f"{jobDataPath}/data_input_{component.lower()}.csv"
-
-            config_epi[component]['LastFileNamePrepared'] = f"{lastjobDataPath}/data_input_{component.lower()}.csv"
-
-            # Use config-defined file lister
-            file_lister_name = config_epi[component]['FileListerFunction']
-
-            file_lister_func = getattr(lister,file_lister_name)
-
-            config_for_lister = config.copy()
-            config_for_lister.update(config_epi)
-
-            lister_kwargs = {}
-            lister_kwargs['reference_date']=config['ReferenceTime']
-
-            loader_kwargs= {}
-
-            loader_dict = {
-                'Deposition' : loader.load_NAME_file,
-                'Environment' : loader.load_env_file,
-                'Epidemiology' : loader.load_and_restructure_epi_file,
-            }
-
-            # Use config-defined file loader
-
-            if 'FileLoaderFunction' in config_epi[component]:
-            
-                loader_func_name = config_epi[component]['FileLoaderFunction']
-                loader_func = getattr(loader,loader_func_name)
-            
-            else:
-        
-                # Use pre-defined file loader
-
-                assert component in loader_dict.keys()
-
-                loader_func = loader_dict.get(component)
-
-            # Provide component-specific variables
-            if component == 'Deposition':
-
-                loader_kwargs['VariableName']= config_for_lister[component].get('VariableName')
-                loader_kwargs['VariableNameAlternative']= config_for_lister[component].get('VariableNameAlternative')
-
-            try:
-                assert component in ['Deposition','Environment']
-                # Make use of data prepared yesterday
-                updater.update_input(
-                        config_for_lister,
-                        reference_date,
-                        end_date,
-                        component=component,
-                        file_lister=file_lister_func,
-                        file_loader=loader_func,
-                        lister_kwargs=lister_kwargs,
-                        update_period_days=3,
-                        **loader_kwargs)
-
-                assert os.path.isfile(config_epi[component]['FileNamePrepared'])
-
-            except AssertionError:
-
-                self.logger.exception(f"Unexpected error in {component} data preparation (updater)")
-
-                # Performa a fresh load of the full time series
-
-                try:
-
-                    prep.prep_input(
-                            config_for_lister,
-                            start_date,
-                            end_date,
-                            component=component,
-                            file_lister=file_lister_func,
-                            file_loader=loader_func,
-                            lister_kwargs=lister_kwargs,
-                            **loader_kwargs)
-
-                    assert os.path.isfile(config_epi[component]['FileNamePrepared'])
-
-                except:
-
-                    self.logger.exception(f"Unexpected error in {component} data preparation (full load)")
-                    status.reset('ERROR')
-                    endJob(status,premature=True)
-
-            return
-
         # get list of variable names to be loaded from deposition input
         depo_variable_names =  config['Epidemiology']['Deposition']['VariableNames']
         assert len(depo_variable_names) == len(diseases)
@@ -492,37 +382,49 @@ class ProcessorEpidemiology(Processor):
 
                 variable_name = depo_variable_names[disease_idx]
 
-                gather_dependent_models(
-                        config_epi,
-                        config,
-                        variable_name,
-                        start_date,
-                        reference_date,
-                        end_date,
-                        jobDataPath,
-                        lastjobDataPath,
-                        status,
-                        component='Deposition')
-
-            # configure filename of prepared deposition data
+                try:
+                    prep.gather_dependent_models(
+                            config_epi,
+                            config,
+                            variable_name,
+                            start_date,
+                            reference_date,
+                            end_date,
+                            jobDataPath,
+                            lastjobDataPath,
+                            component='Deposition')
+                except:
+                    
+                    self.logger.exception(f"Unexpected error in {component} data preparation")
+                    status.reset('ERROR')
+                    endJob(status,premature=True)
+            
+        # configure filename of prepared deposition data
 
             if 'Environment' in config_epi:
 
                 self.logger.info('Preparing environmental suitability data')
 
-                gather_dependent_models(
-                        config_epi,
-                        config,
-                        variable_name,
-                        start_date,
-                        reference_date,
-                        end_date,
-                        jobDataPath,
-                        lastjobDataPath,
-                        status,
-                        component='Environment')
+                try:
+                    prep.gather_dependent_models(
+                            config_epi,
+                            config,
+                            variable_name,
+                            start_date,
+                            reference_date,
+                            end_date,
+                            jobDataPath,
+                            lastjobDataPath,
+                            component='Environment')
+                
+                except:
+
+                    self.logger.exception(f"Unexpected error in {component} data preparation")
+                    status.reset('ERROR')
+                    endJob(status,premature=True)
 
-            # prepare a copy of the host data
+        
+        # prepare a copy of the host data
 
             self.logger.info('Preparing a copy of the host raster data')
 
@@ -569,16 +471,17 @@ class ProcessorEpidemiology(Processor):
             config_epi['Host']['HostCSV'] = dst_host_csv
             config_epi['Host']['FileNamePrepared'] = dst_host_csv
 
-            # Preparing any continue-run files
-            for ci in config['Epidemiology']['Epi']:
-                
-                if ci.get('continue',False) is True:
+        # Preparing any continue-run files
+        for ci in config['Epidemiology']['Epi']:
+            
+            if ci.get('continue',False) is True:
 
-                    model_name = ci['model']
+                model_name = ci['model']
 
-                    # Get results of last day and prepare as input
-                        
-                    gather_dependent_models(
+                # Get results of last day and prepare as input
+                
+                try:
+                    prep.gather_dependent_models(
                             config_epi,
                             config,
                             variable_name,
@@ -587,8 +490,13 @@ class ProcessorEpidemiology(Processor):
                             end_date,
                             jobDataPath,
                             lastjobDataPath,
-                            status,
                             component=model_name)
+                except:
+                    
+                    self.logger.exception(f"Unexpected error in {model_name} data preparation")
+                    status.reset('ERROR')
+                    endJob(status,premature=True)
+
 
             # provide fundamental config elements to config_epi
             for k,v in config.items():
-- 
GitLab