diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py index f60985650a4fa1403f87ea96a2319e4eedb230a8..1a30d173a93cf0a5ae0e4da435559e2dccc8d47e 100644 --- a/coordinator/ProcessorSurveys.py +++ b/coordinator/ProcessorSurveys.py @@ -271,100 +271,11 @@ class ProcessorSurveys(Processor): date = datetime.datetime.now() # prepare environment for clustering calc - call_R = False upload_directory = f"{jobPath}/upload" Path(upload_directory).mkdir(parents=True, exist_ok=True) - if call_R: - - cluster_calc_path = "/storage/app/EWS_prod/code/wheat_source_generation/" - - # clear old output - old_clustering_output_glob = f"{cluster_calc_path}/output/sources_*" - old_clustering_outputs = glob(old_clustering_output_glob) - - self.logger.info('About to unlink old output from clustering calculation') - for path in old_clustering_outputs: - self.logger.info(f"unlinking {path}") - Path(path).unlink() - - - RPath = '/usr/local/R/bin/Rscript' - - clustering_script = f"{cluster_calc_path}/code/R/clustering.R" - - clustering_env = { - **os.environ, - 'R_LIBS':'/home/ewsmanager/R-packages-EWS-clustering/x86_64-pc-linux-gnu-library/3.5', - 'PROJ_LIB' : '/usr/share/proj/', # conda env breaks the automatic assignment of PROJ_LIB - } - - clustering_config = config['Survey']['SourcesConfigFilename'] - assert os.path.isfile(clustering_config) - - clustering_calc = [RPath, - '--no-init-file', - clustering_script, - processed_surveys_filepath, - config['StartString'], - '-2', - '7', - config['Survey']['SourcesConfigFilename']] - - self.logger.debug('Performing clustering calculation') - - description_short = 'wheat-source-generation' - description_long = 'source calculation on processed surveys' - - try: - subprocess_and_log(clustering_calc, description_short, description_long, env=clustering_env) - except: - status.reset('ERROR') - endJob(status,premature=True) - - self.logger.debug('Checking output of clustering calculation') - - try: - self.logger.debug('Trying to copy the dataset processed for clustering') - - clustering_proc_path_glob = f"{cluster_calc_path}/output/survey_data_processed_{config['Survey']['SourcesRegionName']}_{date.strftime('%Y-%m-%d')}_*.csv" - clustering_proc_path_list = glob(clustering_proc_path_glob) - if len(clustering_proc_path_list) == 0: - self.logger.debug(f"No processed files produced from clustering in {clustering_proc_path_glob}") - raise Exception - - elif len(clustering_proc_path_list) > 1: - self.logger.debug(f"Multiple processed files produced from clustering in {clustering_proc_path_glob}") - raise Exception - - else: - self.logger.debug('Found 1 processed file, placing copy of result in job directory') - - proc_filename = f"survey_data_processed_{config['StartString']}.csv" - proc_path = f"{output_directory}/{proc_filename}" - - self.logger.debug(f"as {proc_path}") - - copyfile(clustering_proc_path_list[0], proc_path) - - except: - self.logger.debug('Failed to get a copy of the dataset processed for clustering') - - clustering_output_path_glob = f"{cluster_calc_path}/output/sources_{config['Survey']['SourcesRegionName']}_{date.strftime('%Y-%m-%d')}_*.csv" - clustering_output_path_list = glob(clustering_output_path_glob) - if len(clustering_output_path_list) == 0: - self.logger.error(f"No output produced from clustering in {clustering_output_path_glob}") - status.reset('ERROR') - endJob(status,premature=True) - if len(clustering_output_path_list) > 1: - self.logger.error(f"Multiple outputs produced from clustering in {clustering_output_path_glob}") - status.reset('ERROR') - endJob(status,premature=True) - - sources_path = clustering_output_path_list[0] - - elif 'Groups' in config['Survey']: + if 'Groups' in config['Survey']: # if 'Groups' is defined in the config, create grouped survey files and run python version self.logger.info('Preparing grouped survey files') diff --git a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json index c7ae97dad44d0cf8f1d480c7cbf1b715116187e0..9f067de00edc542a5534a91e0c7077e0d09b47e5 100644 --- a/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json +++ b/tests/test_data/test_deployment/regions/EastAfrica/resources/coordinator/configs/config_EastAfrica_fc_live.json @@ -41,9 +41,8 @@ }, "GroupBy" : ["Origin"], "GroupsToIgnore" : ["ODK-server", "kobo-server", "newODK", "newODK2", "CSV-CAM"], - "SourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/wheat_source_generation/configs/config_EastAfrica_mapspam2017.R", "SourcesRegionName" : "EastAfrica", - "pySourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/source_gen/configs/config_EastAfrica_mapspam2017.json" + "SourcesConfigFilename" : "../../test_data/test_deployment/regions/EastAfrica/resources/source_gen/configs/config_EastAfrica_mapspam2017.json" }, "Environment" : { "ServerPathTemplate" : "/storage/sftp/metofficeupload/upload/Ethiopia/fromMO/daily_name/",