From e67d20602d7b13f28c3b3812c74b3ffc0f3f52b3 Mon Sep 17 00:00:00 2001 From: tm689 <tm689@cam.ac.uk> Date: Tue, 26 Mar 2024 11:34:55 +0000 Subject: [PATCH] Feat: add dtype convert function --- coordinator/ProcessorSurveyUtils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/coordinator/ProcessorSurveyUtils.py b/coordinator/ProcessorSurveyUtils.py index 8d8b888..92be9e7 100644 --- a/coordinator/ProcessorSurveyUtils.py +++ b/coordinator/ProcessorSurveyUtils.py @@ -74,6 +74,21 @@ def parse_date(series,name_out='date',fmt_in = '%Y-%m-%d',fmt_out= '%b %d, %Y'): return s_out +def parse_dtype(series, name_out, dtype, fillna = None): + + # Handle nans explicitly + if fillna is not None: + series.fillna(fillna,inplace=True) + + # Replacing '' with None and converting dtype + # '' cannot be handled by astype + series_out = series.replace('',None).astype(dtype) + + # Renaming series + series_out.rename(name_out,inplace=True) + + return series_out + def parse_cases(series, name_out, cases, dtype = None, fillna = None): if dtype is None: @@ -97,7 +112,8 @@ def parse_cases(series, name_out, cases, dtype = None, fillna = None): func_dict = { 'parse_date' : parse_date, 'parse_location_kobotoolbox' : parse_location_kobotoolbox, - 'parse_cases' : parse_cases + 'parse_cases' : parse_cases, + 'parse_dtype' : parse_dtype } def parse_columns(df_in,coln_parser_dict): -- GitLab