FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
Commit 6c8a1573 authored by Richard Stutt's avatar Richard Stutt
Browse files

Survey data handles errors in ODK database and additional data sources

parent 571f95ca
No related branches found
No related tags found
No related merge requests found
SubmissionDate,start,end,today,deviceid,subscriberid,imei,phonenumber,username,surveyor_infromation-country,surveyor_infromation-surveyor_name,surveyor_infromation-institution,survey_infromation-location_name,survey_infromation-location-Latitude,survey_infromation-location-Longitude,survey_infromation-location-Altitude,survey_infromation-location-Accuracy,survey_infromation-survey_date,site_information-survey_site,site_information-crop,site_information-field_area,site_information-variety,site_information-growth_stage,other_crop,stem_rust-stemrust_incidence,stem_rust-Stemrust_severity,stem_rust-stemrust_host_plant_reaction,leaf_rust-leafrust_incidence,leaf_rust-leafrust_severity,leaf_rust-leafrust_host_plant_reaction,yellow_rust-yellowrust_incidence,yellow_rust-yellowrust_severity,yellow_rust-yellowrust_host_plant_reaction,septoria-septoria_incidence,septoria-septoria_severity,other_diseases_group-other_diseases,score_diseases_count,SET-OF-score_diseases,samples_collected,samples_type,sample_size-number_stemrust_live,sample_size-number_stemrust_dead_dna,sample_size-number_yellowrust_live,sample_size-number_yellowrust_dead,sample_size-number_leafrust_live,sample_size-using_barcode,live_stemrust_samples_count,SET-OF-live_stemrust_samples,dead_stemrust_samples_count,SET-OF-dead_stemrust_samples,live_yellowrust_samples_count,SET-OF-live_yellowrust_samples,dead_yellowrust_samples_count,SET-OF-dead_yellowrust_samples,live_leafrust_samples_count,SET-OF-live_leafrust_samples,comment,meta-instanceID,KEY
06-Aug-2018 13:43:06,13-Jun-2018 12:26:53,06-Aug-2018 13:42:21,13-Jun-2018,FC:19:10:89:E2:6B,"",FC:19:10:89:E2:6B,"",mekele,Ethiopia,tesfay gebrekirstos,Tigray Agricultural Research Institute,Ilala,13.5235519900,39.5033560800,1991.0000000000,5.0000000000,06-Aug-2018,farmer_field,bread_wheat,2.0000000000,mekele-1,tillering,"",high,20,mr,low,10,mr,high,50,s,medium,55,glume_blotch loose_smut pythium_root_rot scab_fusarium_head_blight take_all tan_spot,6,uuid:58cb6a0f-8e47-4339-8080-0e320234415c/score_diseases,no,"","","","","","","","","","","","","","","","","",There is potential inoculum for the future.,uuid:58cb6a0f-8e47-4339-8080-0e320234415c,uuid:58cb6a0f-8e47-4339-8080-0e320234415c
options(stringsAsFactors = FALSE)
library(dplyr)
args = commandArgs(TRUE)
defaultDataFileName = args[1]
deleteRowsFileName = args[2]
addRowsFileName = args[3]
outputFileName = args[4]
#Reason for using colClasses is that otherwise it gets the inference wrong for the deleteData table, which has much less data for it to work with,
#and therefore dplyr can't match between the tow tables
defaultData = read.csv(defaultDataFileName, header = TRUE, colClasses = "character")
#Remove unwanted rows:
deleteData = read.csv(deleteRowsFileName, header = TRUE, colClasses = "character")
cleanData = anti_join(defaultData, deleteData)
#TODO: Check and see if each row from deleteData was removed from defaultData - warn if a row not removed
#Add extra rows:
#TODO: enable once data available
#extraData = read.csv(addRowsFileName, header = TRUE, colClasses = "character")
#joinedData = rbind(cleanData, extraData)
#Write output:
write.csv(joinedData, file = outputFileName, row.names = FALSE, quote = FALSE)
......@@ -20,8 +20,10 @@ my $debugNoUpload = 0;
my $workspacePath = "/storage/app/Ethiopia-EWS/Ethiopia-EWS-Workspace";
my $coordinatorPath = "/storage/app/Ethiopia-EWS/Ethiopia-EWS-Coordinator";
my $RPath = "/usr/local/R/bin/Rscript";
my @maintainers = ("ca500\@cam.ac.uk", "ds603\@cam.ac.uk", "rs481\@cam.ac.uk");
#@maintainers = ("rs481\@cam.ac.uk");print STDERR "DEBUG: MAINTAINERS LIST\n";
@maintainers = ("rs481\@cam.ac.uk");print STDERR "DEBUG: MAINTAINERS LIST\n";
my $todayString = strftime("%Y%m%d", localtime());
......@@ -131,13 +133,21 @@ unless(-d $serverOutputDir) {
my $csvOutputDir = $todayFolderPath."/ExportCSV";
my $csvFileName = "SurveyData.csv";
my $processedSurveyDataFileName = $csvOutputDir."/Processed_".$csvFileName;
my $surveyDataErrorsToRemoveFileName = $coordinatorPath."/SurveyDataErrorsToRemove.csv";
my $surveyDataAdditionalDataFileName = "";
unless($serverFailure) {
my $dataToCSVCmd = "java -jar ".$serverJar." --export --form_id ".$serverFormID." --storage_directory ".$serverOutputDir." --export_directory ".$csvOutputDir." --export_filename ".$csvFileName;
system($dataToCSVCmd);
#Check we got the folder we were expecting
unless(-d $csvOutputDir) {
if (-d $csvOutputDir) {
#Do any necessary modifications to the survey data:
system($RPath." ".$coordinatorPath."/SurveyDataPreprocessor.R ".$csvOutputDir."/".$csvFileName." ".$surveyDataErrorsToRemoveFileName." ".$surveyDataAdditionalDataFileName." ".$processedSurveyDataFileName);
} else {
#TODO: If this fails, we should either retry after a few minutes, or just put up yesterdays data
if($debugOutput) {
print("DEBUG: ".$debugTimeString." csv Files not extracted: ".$csvOutputDir);
......@@ -146,9 +156,9 @@ unless($serverFailure) {
#handleError("Failure: Didn't manage to convert to csv in output directory: ".$csvOutputDir);
$serverFailure = 1;
}
}
my $localOutputFolderName = $todayFolderPath."/".$jobIDString."_".$todayString."_0000";
if(!$serverFailure) {
......@@ -169,7 +179,7 @@ if(!$serverFailure) {
#Just in case the server was *very* slow to send the data
my $todayAndrewString = strftime("%Y-%m-%d", localtime());
my $clusteringCmd = "/usr/local/R/bin/Rscript code/R/clustering.R ".$csvOutputDir."/".$csvFileName." ".$todayString." -2 7 --plot";
my $clusteringCmd = $RPath." code/R/clustering.R ".$processedSurveyDataFileName." ".$todayString." -2 7 --plot";
system($clusteringCmd);
#Find the output file:
......@@ -195,11 +205,6 @@ if(!$serverFailure) {
my $sourcesUploadFileName = $localOutputFolderName."/sources_".$todayString.".csv";
fcopy($clusteringOutputFile, $sourcesUploadFileName) or handleError("ERROR: Unable to copy output file ".$clusteringOutputFile." to the output upload staging folder ".$sourcesUploadFileName);
unless($debugNoUpload) {
#Transfer the output to public server for others to be able to pull it down:
system("scp -i ".$coordinatorPath."/ssh_key_willow"." -r ".$localOutputFolderName." willow.csx.cam.ac.uk:/var/www/html/Ethiopia/");
}
} else {
#Server was not able to provide us with the latest data, so we need to look back in time until we find one that succeeded
......@@ -233,10 +238,6 @@ if(!$serverFailure) {
dircopy($lastSuccessfulClusteringOutputPath, $localOutputFolderName);
my $ret = system("scp -i ".$coordinatorPath."/ssh_key_willow"." -r ".$localOutputFolderName." willow.csx.cam.ac.uk:/var/www/html/Ethiopia/");
print "Transfer output: ".$ret."\n";
my $warningTimeStamp = getTimestampNow();
my $warningString = $staleClusteringDataWarningString;
......@@ -252,6 +253,12 @@ if(!$serverFailure) {
}
#Upload the survey data:
unless($debugNoUpload) {
#Transfer the output to public server for others to be able to pull it down:
system("scp -i ".$coordinatorPath."/ssh_key_willow"." -r ".$localOutputFolderName." willow.csx.cam.ac.uk:/var/www/html/Ethiopia/");
}
#Successful exit, remove the in progress file:
unlink($inProgressFilePath) or handleError("ERROR: Unable to remove in-progress lock file: ".$inProgressFilePath);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment