FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
Commit c530dc08 authored by Dr T. Mona's avatar Dr T. Mona
Browse files

fix: Read KEY column as string.

The WRSIS download KEY column is an intger which made a problem in the survey data remove subprocess. The solution is to read the KEY column always as a string.
parent 45e1877e
No related branches found
No related tags found
No related merge requests found
...@@ -796,7 +796,7 @@ def process_in_job_survey(jobPath,status,config,component): ...@@ -796,7 +796,7 @@ def process_in_job_survey(jobPath,status,config,component):
for form_name,form_fn in csv_filenames.items(): for form_name,form_fn in csv_filenames.items():
# some define column types, hardwired for now # some define column types, hardwired for now
col_types = {'comment':'str'} col_types = {'comment':'str','KEY':'str'}
form_df = read_csv(form_fn,dtype=col_types) form_df = read_csv(form_fn,dtype=col_types)
...@@ -901,7 +901,7 @@ def process_in_job_survey(jobPath,status,config,component): ...@@ -901,7 +901,7 @@ def process_in_job_survey(jobPath,status,config,component):
keys_to_rm = df_rm['KEY'] keys_to_rm = df_rm['KEY']
# check that all of the keys to remove exist in the original data # check that all of the keys to remove exist in the original data
rm_keys_found = df_rm['KEY'].apply(lambda cell: cell in dfm['KEY'].values) rm_keys_found = df_rm['KEY'].isin(dfm['KEY'])
n_rm_keys_found = rm_keys_found.sum() n_rm_keys_found = rm_keys_found.sum()
n_rm_keys = rm_keys_found.size n_rm_keys = rm_keys_found.size
if not np_all(rm_keys_found): if not np_all(rm_keys_found):
...@@ -911,6 +911,11 @@ def process_in_job_survey(jobPath,status,config,component): ...@@ -911,6 +911,11 @@ def process_in_job_survey(jobPath,status,config,component):
rm_keys_not_found = df_rm[~rm_keys_found] rm_keys_not_found = df_rm[~rm_keys_found]
logger.debug(f"Erroneous entries not found are:\n{rm_keys_not_found}") logger.debug(f"Erroneous entries not found are:\n{rm_keys_not_found}")
logger.debug(f"Type of keys that can be found include:\n{dfm['KEY'].dtype}")
dfm_short_keys = [val for val in dfm['KEY'].values if len(str(val)) <10]
logger.debug(f"Keys that can be found include:\n{dfm_short_keys}")
# identify which surveys to remove # identify which surveys to remove
idx_to_rm = dfm['KEY'].apply(lambda cell: cell in keys_to_rm.values) idx_to_rm = dfm['KEY'].apply(lambda cell: cell in keys_to_rm.values)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment