From 08f2dc2e8d03a8e86a2d14e5e9a23cb276059333 Mon Sep 17 00:00:00 2001 From: jws52 <jws52@cam.ac.uk> Date: Thu, 21 Mar 2024 15:38:43 +0000 Subject: [PATCH] refactor: Remove unused call to R-based source calc --- coordinator/ProcessorSurveys.py | 91 +------------------ .../configs/config_EastAfrica_fc_live.json | 3 +- 2 files changed, 2 insertions(+), 92 deletions(-) diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py index f609856..1a30d17 100644 --- a/coordinator/ProcessorSurveys.py +++ b/coordinator/ProcessorSurveys.py @@ -271,100 +271,11 @@ class ProcessorSurveys(Processor): date = datetime.datetime.now() # prepare environment for clustering calc - call_R = False upload_directory = f"{jobPath}/upload" Path(upload_directory).mkdir(parents=True, exist_ok=True) - if call_R: - - cluster_calc_path = "/storage/app/EWS_prod/code/wheat_source_generation/" - - # clear old output - old_clustering_output_glob = f"{cluster_calc_path}/output/sources_*" - old_clustering_outputs = glob(old_clustering_output_glob) - - self.logger.info('About to unlink old output from clustering calculation') - for path in old_clustering_outputs: - self.logger.info(f"unlinking {path}") - Path(path).unlink() - - - RPath = '/usr/local/R/bin/Rscript' - - clustering_script = f"{cluster_calc_path}/code/R/clustering.R" - - clustering_env = { - **os.environ, - 'R_LIBS':'/home/ewsmanager/R-packages-EWS-clustering/x86_64-pc-linux-gnu-library/3.5', - 'PROJ_LIB' : '/usr/share/proj/', # conda env breaks the automatic assignment of PROJ_LIB - } - - clustering_config = config['Survey']['SourcesConfigFilename'] - assert os.path.isfile(clustering_config) - - clustering_calc = [RPath, - '--no-init-file', - clustering_script, - processed_surveys_filepath, - config['StartString'], - '-2', - '7', - config['Survey']['SourcesConfigFilename']] - - self.logger.debug('Performing clustering calculation') - - description_short = 'wheat-source-generation' - description_long = 'source calculation on processed surveys' - - try: - subprocess_and_log(clustering_calc, description_short, description_long, env=clustering_env) - except: - status.reset('ERROR') - endJob(status,premature=True) - - self.logger.debug('Checking output of clustering calculation') - - try: - self.logger.debug('Trying to copy the dataset processed for clustering') - - clustering_proc_path_glob = f"{cluster_calc_path}/output/survey_data_processed_{config['Survey']['SourcesRegionName']}_{date.strftime('%Y-%m-%d')}_*.csv" - clustering_proc_path_list = glob(clustering_proc_path_glob) - if len(clustering_proc_path_list) == 0: - self.logger.debug(f"No processed files produced from clustering in {clustering_proc_path_glob}") - raise Exception - - elif len(clustering_proc_path_list) > 1: - self.logger.debug(f"Multiple processed files produced from clustering in {clustering_proc_path_glob}") - raise Exception - - else: - self.logger.debug('Found 1 processed file, placing copy of result in job directory') - - proc_filename = f"survey_data_processed_{config['StartString']}.csv" - proc_path = f"{output_directory}/{proc_filename}" - - self.logger.debug(f"as {proc_path}") - - copyfile(clustering_proc_path_list[0], proc_path) - - except: - self.logger.debug('Failed to get a copy of the dataset processed for clustering') - - clustering_output_path_glob = f"{cluster_calc_path}/output/sources_{config['Survey']['SourcesRegionName']}_{date.strftime('%Y-%m-%d')}_*.csv" - clustering_output_path_list = glob(clustering_output_path_glob) - if len(clustering_output_path_list) == 0: - self.logger.error(f"No output produced from clustering in {clustering_output_path_glob}") - status.reset('ERROR') - endJob(status,premature=True) - if len(clustering_output_path_list) > 1: - self.logger.error(f"Multiple outputs produced from clustering in {clustering_output_path_glob}") - status.reset('ERROR') - endJob(status,premature=True) - - sources_path = clustering_output_path_list[0] - - elif 'Groups' in config['Survey']: + if 'Groups' in config['Survey']: # if 'Groups' is defined in the config, create grouped survey files and run python version self.logger.info('Preparing grouped survey files') diff --git a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json index c7ae97d..9f067de 100644 --- a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json +++ b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json @@ -41,9 +41,8 @@ }, "GroupBy" : ["Origin"], "GroupsToIgnore" : ["ODK-server", "kobo-server", "newODK", "newODK2", "CSV-CAM"], - "SourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/wheat_source_generation/configs/config_EastAfrica_mapspam2017.R", "SourcesRegionName" : "EastAfrica", - "pySourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/source_gen/configs/config_EastAfrica_mapspam2017.json" + "SourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/source_gen/configs/config_EastAfrica_mapspam2017.json" }, "Environment" : { "ServerPathTemplate" : "/storage/sftp/metofficeupload/upload/Ethiopia/fromMO/daily_name/", -- GitLab