diff --git a/ProcessorSurveys.py b/ProcessorSurveys.py index 10d62989349d8be28bd6281210343c3673a32e8c..1a2fc1bfb15e052693551357f09607d9815fdae5 100644 --- a/ProcessorSurveys.py +++ b/ProcessorSurveys.py @@ -796,7 +796,7 @@ def process_in_job_survey(jobPath,status,config,component): for form_name,form_fn in csv_filenames.items(): # some define column types, hardwired for now - col_types = {'comment':'str'} + col_types = {'comment':'str','KEY':'str'} form_df = read_csv(form_fn,dtype=col_types) @@ -901,7 +901,7 @@ def process_in_job_survey(jobPath,status,config,component): keys_to_rm = df_rm['KEY'] # check that all of the keys to remove exist in the original data - rm_keys_found = df_rm['KEY'].apply(lambda cell: cell in dfm['KEY'].values) + rm_keys_found = df_rm['KEY'].isin(dfm['KEY']) n_rm_keys_found = rm_keys_found.sum() n_rm_keys = rm_keys_found.size if not np_all(rm_keys_found): @@ -911,6 +911,11 @@ def process_in_job_survey(jobPath,status,config,component): rm_keys_not_found = df_rm[~rm_keys_found] logger.debug(f"Erroneous entries not found are:\n{rm_keys_not_found}") + logger.debug(f"Type of keys that can be found include:\n{dfm['KEY'].dtype}") + + dfm_short_keys = [val for val in dfm['KEY'].values if len(str(val)) <10] + logger.debug(f"Keys that can be found include:\n{dfm_short_keys}") + # identify which surveys to remove idx_to_rm = dfm['KEY'].apply(lambda cell: cell in keys_to_rm.values)