From 08f2dc2e8d03a8e86a2d14e5e9a23cb276059333 Mon Sep 17 00:00:00 2001
From: jws52 <jws52@cam.ac.uk>
Date: Thu, 21 Mar 2024 15:38:43 +0000
Subject: [PATCH] refactor: Remove unused call to R-based source calc

---
 coordinator/ProcessorSurveys.py               | 91 +------------------
 .../configs/config_EastAfrica_fc_live.json    |  3 +-
 2 files changed, 2 insertions(+), 92 deletions(-)

diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py
index f609856..1a30d17 100644
--- a/coordinator/ProcessorSurveys.py
+++ b/coordinator/ProcessorSurveys.py
@@ -271,100 +271,11 @@ class ProcessorSurveys(Processor):
         date = datetime.datetime.now()
 
         # prepare environment for clustering calc
-        call_R = False
 
         upload_directory = f"{jobPath}/upload"
         Path(upload_directory).mkdir(parents=True, exist_ok=True)
 
-        if call_R:
-
-            cluster_calc_path = "/storage/app/EWS_prod/code/wheat_source_generation/"
-
-            # clear old output
-            old_clustering_output_glob = f"{cluster_calc_path}/output/sources_*"
-            old_clustering_outputs = glob(old_clustering_output_glob)
-
-            self.logger.info('About to unlink old output from clustering calculation')
-            for path in old_clustering_outputs:
-                self.logger.info(f"unlinking {path}")
-                Path(path).unlink()
-
-
-            RPath = '/usr/local/R/bin/Rscript'
-
-            clustering_script = f"{cluster_calc_path}/code/R/clustering.R"
-
-            clustering_env = {
-                    **os.environ,
-                    'R_LIBS':'/home/ewsmanager/R-packages-EWS-clustering/x86_64-pc-linux-gnu-library/3.5',
-                    'PROJ_LIB' : '/usr/share/proj/', # conda env breaks the automatic assignment of PROJ_LIB
-                    }
-
-            clustering_config = config['Survey']['SourcesConfigFilename']
-            assert os.path.isfile(clustering_config)
-
-            clustering_calc = [RPath,
-                    '--no-init-file',
-                    clustering_script,
-                    processed_surveys_filepath,
-                    config['StartString'],
-                    '-2',
-                    '7',
-                    config['Survey']['SourcesConfigFilename']]
-
-            self.logger.debug('Performing clustering calculation')
-
-            description_short = 'wheat-source-generation'
-            description_long = 'source calculation on processed surveys'
-
-            try:
-                subprocess_and_log(clustering_calc, description_short, description_long, env=clustering_env)
-            except:
-                status.reset('ERROR')
-                endJob(status,premature=True)
-
-            self.logger.debug('Checking output of clustering calculation')
-
-            try:
-                self.logger.debug('Trying to copy the dataset processed for clustering')
-
-                clustering_proc_path_glob = f"{cluster_calc_path}/output/survey_data_processed_{config['Survey']['SourcesRegionName']}_{date.strftime('%Y-%m-%d')}_*.csv"
-                clustering_proc_path_list = glob(clustering_proc_path_glob)
-                if len(clustering_proc_path_list) == 0:
-                    self.logger.debug(f"No processed files produced from clustering in {clustering_proc_path_glob}")
-                    raise Exception
-
-                elif len(clustering_proc_path_list) > 1:
-                    self.logger.debug(f"Multiple processed files produced from clustering in {clustering_proc_path_glob}")
-                    raise Exception
-
-                else:
-                    self.logger.debug('Found 1 processed file, placing copy of result in job directory')
-
-                    proc_filename = f"survey_data_processed_{config['StartString']}.csv"
-                    proc_path = f"{output_directory}/{proc_filename}"
-
-                    self.logger.debug(f"as {proc_path}")
-
-                    copyfile(clustering_proc_path_list[0], proc_path)
-
-            except:
-                self.logger.debug('Failed to get a copy of the dataset processed for clustering')
-
-            clustering_output_path_glob = f"{cluster_calc_path}/output/sources_{config['Survey']['SourcesRegionName']}_{date.strftime('%Y-%m-%d')}_*.csv"
-            clustering_output_path_list = glob(clustering_output_path_glob)
-            if len(clustering_output_path_list) == 0:
-                self.logger.error(f"No output produced from clustering in {clustering_output_path_glob}")
-                status.reset('ERROR')
-                endJob(status,premature=True)
-            if len(clustering_output_path_list) > 1:
-                self.logger.error(f"Multiple outputs produced from clustering in {clustering_output_path_glob}")
-                status.reset('ERROR')
-                endJob(status,premature=True)
-
-            sources_path = clustering_output_path_list[0]
-
-        elif 'Groups' in config['Survey']:
+        if 'Groups' in config['Survey']:
             # if 'Groups' is defined in the config, create grouped survey files and run python version
 
             self.logger.info('Preparing grouped survey files')
diff --git a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json
index c7ae97d..9f067de 100644
--- a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json
+++ b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json
@@ -41,9 +41,8 @@
         },
         "GroupBy" : ["Origin"],
         "GroupsToIgnore" : ["ODK-server", "kobo-server", "newODK", "newODK2", "CSV-CAM"],
-        "SourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/wheat_source_generation/configs/config_EastAfrica_mapspam2017.R",
         "SourcesRegionName" : "EastAfrica",
-        "pySourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/source_gen/configs/config_EastAfrica_mapspam2017.json"
+        "SourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/source_gen/configs/config_EastAfrica_mapspam2017.json"
     },
     "Environment" : {
         "ServerPathTemplate" : "/storage/sftp/metofficeupload/upload/Ethiopia/fromMO/daily_name/",
-- 
GitLab