diff --git a/coordinator/ProcessorSurveyUtils.py b/coordinator/ProcessorSurveyUtils.py index 8d8b888e5da587800d0b30b60c4f0193447ef093..92be9e75736307bc22d6d3ba83bbb42715ed0478 100644 --- a/coordinator/ProcessorSurveyUtils.py +++ b/coordinator/ProcessorSurveyUtils.py @@ -74,6 +74,21 @@ def parse_date(series,name_out='date',fmt_in = '%Y-%m-%d',fmt_out= '%b %d, %Y'): return s_out +def parse_dtype(series, name_out, dtype, fillna = None): + + # Handle nans explicitly + if fillna is not None: + series.fillna(fillna,inplace=True) + + # Replacing '' with None and converting dtype + # '' cannot be handled by astype + series_out = series.replace('',None).astype(dtype) + + # Renaming series + series_out.rename(name_out,inplace=True) + + return series_out + def parse_cases(series, name_out, cases, dtype = None, fillna = None): if dtype is None: @@ -97,7 +112,8 @@ def parse_cases(series, name_out, cases, dtype = None, fillna = None): func_dict = { 'parse_date' : parse_date, 'parse_location_kobotoolbox' : parse_location_kobotoolbox, - 'parse_cases' : parse_cases + 'parse_cases' : parse_cases, + 'parse_dtype' : parse_dtype } def parse_columns(df_in,coln_parser_dict):