diff --git a/ENVDataPostProcessor.py b/ENVDataPostProcessor.py new file mode 100755 index 0000000000000000000000000000000000000000..a61acf890efcb8e7bfdd4bedbd85ad76b7f556bd --- /dev/null +++ b/ENVDataPostProcessor.py @@ -0,0 +1,121 @@ +#ENVDataPostProcessor.py +'''Gathers RIE_value data from any parts into one file, for EPI model to run. + +This will be needed if any more parts-based calculations of environmental +suitability are run without the ews_plotting routine. + +Note that gather_RIE_values_v1_1() is only needed for dates missing from +/storage/app/EWS/Ethiopia/Plotting/output . + +gather_RIE_values_v1_3() is used in operational. +''' +from pathlib import Path +import os +import argparse +from glob import glob + +import pandas as pd +import numpy as np + +# Create the parser +my_parser = argparse.ArgumentParser(description='Different optional arguments') + +# Add the arguments +my_parser.add_argument( + '-inpath','--inpath', + metavar = 'path', + type = str, + required = True, + default = '/home/jws52/projects/py-coordination/ENVIRONMENT_20200115/env_suit_wheatrust_20200115/StripeRustOutput/', + help = 'the directory path to all of the separate part calcs') + +my_parser.add_argument( + '-outpath','--outpath', + metavar = 'path', + type = str, + required = True, + default = '/home/jws52/projects/py-coordination/ENVIRONMENT_20200115/EPI/ENV', + help = 'the directory path to gather separate part calcs') + +my_parser.add_argument( + '-rusttype','--rusttype', + choices = ['stripe','stem','leaf'], + type = str, + required = True, + default = 'stripe', + help = 'the directory path to gather separate part calcs') + +args = my_parser.parse_args() + +print(f"input arguments are:\n{args}") + +assert os.path.exists(args.inpath) + +# make output directory +outdir = f'{args.outpath}/{args.rusttype.title()}Rust' + +Path(outdir).mkdir(parents=True, exist_ok=True) + +def gather_RIE_values_v1_1(envpath=args.inpath,outpath=args.outpath,rusttype=args.rusttype): + + envparts = sorted(glob(envpath+'part_*/*/RIE_value.csv')) + + pdparts = [pd.read_csv(fn) for fn in envparts] + + pdall = pd.concat( + pdparts, + ignore_index=False, + keys=['part1','part2','part3','part4','part5','part6']) + + pdall.rename({'Unnamed: 0':'X'},axis=1,inplace=True) + + pdall.to_csv(f'{outpath}/{rusttype}/test_RIE_value.csv',index=False) + + #df2 = read_csv('test_RIE_value.csv') + + return pdall + +def gather_RIE_values_v1_3(envpath=args.inpath,outpath=args.outpath,rusttype=args.rusttype): + + envparts = sorted(glob(envpath+'Region*/*0000/RIE_value.csv')) + + pdparts = [pd.read_csv(fn) for fn in envparts] + + pdall = pd.concat( + pdparts, + ignore_index=False, + keys=['part1','part2','part3','part4','part5','part6']) + + pdall.set_index('Unnamed: 0',inplace=True) + + print(pdall) + + pdall.index.name = None + + pdall.to_csv(f'{outdir}/RIE_value.csv') + + #df2 = read_csv('test_RIE_value.csv') + + return pdall + + +def test_case(): + inpath_default = '/home/jws52/projects/py-coordination/ENVIRONMENT_20200115/env_suit_wheatrust_20200115/StripeRustOutput/' + outpath_default = '/home/jws52/projects/py-coordination/ENVIRONMENT_20200115/EPI/ENV' + rusttype_default = 'stripe' + + df2 = gather_RIE_values(inpath_default,outpath_default,rusttype_default) + + # example case to reproduce + fn1 = '/storage/app/EWS/Ethiopia/Workspace/ENVIRONMENT_20200115/EPI/ENVIRONMENT/Stripe/RIE_value.csv' + df1 = pd.read_csv(fn1) + + print('testing') + assert np.allclose(df1,df2) + +if __name__ == '__main__': + + df2 = gather_RIE_values_v1_3(args.inpath,args.outpath,args.rusttype) + +print("Finished!") + diff --git a/ENVDataProcessor.pl b/ENVDataProcessor.pl index 8cdb8c5c811bcd863033be043cd21ba66f6048c3..f4beb3bb220f76a085dee61a3f84de02fec4c007 100755 --- a/ENVDataProcessor.pl +++ b/ENVDataProcessor.pl @@ -313,11 +313,26 @@ unless(-d $todayOutputDir) { my $necessaryOutputsGlob = get_env_expected_inputs_glob(); my @necessaryOutputs = glob($todayOutputDir.$necessaryOutputsGlob); +if($debugOutput) { + print("DEBUG: ".$debugTimeString." Building calls to gather part files\n"); +} +my @gather_opts = (); +my @rusttypes_to_gather = get_gather_parts_py_cmds(); +foreach my $rusttype (@rusttypes_to_gather) { + push @gather_opts, '--inpath '.$todayOutputDir.'/'."\u$rusttype".'Rust_Ethiopia/ '. + '--outpath '.$todayFolderPath.'/processed'. + ' --rusttype '.lc($rusttype); +} - - - - +# process part files +if($debugOutput) { + print("DEBUG: ".$debugTimeString." Calling gather commands\n"); +} +foreach my $gather_opt (@gather_opts) { + system('/storage/app/EWS/General/EWS-Coordinator/run_EnvPostProcess.sh '.$gather_opt) and handleError("ERROR: Failed to gather parts"); +} +if($debugOutput) { + print("DEBUG: ".$debugTimeString." Gather commands complete\n"); } my $plotPath = $todayFolderPath."/plotting/"; @@ -332,8 +347,6 @@ if($debugNoPlot) { # no more work to do, so clear up in-progress file #Successful run, no plotting expected at beginning of Feb 2020. - #Successful run, no plotting expected at beginning of Feb 2020. - #Successful run, no plotting expected at beginning of Feb 2020. # placeholder file state this my $noplotFilePath = $todayFolderPath."/".$jobIDString."_NO_PLOTTING.txt"; open(my $noplotFH, ">", $noplotFilePath) or handleError("ERROR: Unable to open confirmation file for writing: ".$doneFilePath); @@ -412,9 +425,6 @@ unless($debugNoUpload) { #TODO: Bung todays workspace folder onto the RCS -#No longer a need to gather parsed output csvs to be used by the EPI model, because EWS-plotting does this -# look in $plotPath/plotting/*/input_csvs/ - #Flag overall success of operation: open(my $successFH, ">", $doneFilePath) or handleError("ERROR: Unable to open confirmation file for writing: ".$doneFilePath); print $successFH "Completed: ".getTimestampNow()."\n"; diff --git a/ProjectConfig_Ethiopia.pl b/ProjectConfig_Ethiopia.pl index b313b22f86b2479f7a0cb06251a8a49fd6051481..4fd84f21b353fd4f639ab2dd58e2c1744ad076a3 100644 --- a/ProjectConfig_Ethiopia.pl +++ b/ProjectConfig_Ethiopia.pl @@ -27,6 +27,7 @@ sub get_expected_available_minute { return 0 }; sub get_ENV_directory_prefix { return "WR_EnvSuit_Ethiopia_" }; sub get_env_expected_inputs_glob { return "{Stem,Stripe}Rust_Ethiopia/Region*/*0000/RIE_value.csv" }; # v1.3 output #sub get_env_expected_inputs_glob { return "?/RIE_value.csv" }; # v2.0 output +sub get_gather_parts_py_cmds { return ('Stripe', 'Stem') }; sub get_env_plotting_cmds { return ( "/storage/app/EWS/General/EWS-Plotting-Temp/python/ethiopia/run_eth_env_plotting_pine.sh",) }; sub get_env_plotting_output_glob {return "ethiopia/images/{Daily,Weekly}" }; diff --git a/run_EnvPostProcess.sh b/run_EnvPostProcess.sh new file mode 100755 index 0000000000000000000000000000000000000000..4c8e5c9848051995b5816eeeb2b5bc33a0c41f9c --- /dev/null +++ b/run_EnvPostProcess.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# provide custom python packages so they can be imported + +# activate conda environment of python modules so they can be imported +source /storage/app/miniconda3/bin/activate /storage/app/EWS/General/EWS-python/py3EWSepi + +python /storage/app/EWS/General/EWS-Coordinator/ENVDataPostProcessor.py "$@" + +# deactivate conda environment +source /storage/app/miniconda3/bin/deactivate /storage/app/EWS/General/EWS-python/py3EWSepi +