From 66fff212139e33ce1e7c1f55edbdec65352017cf Mon Sep 17 00:00:00 2001 From: jws52 <jws52@cam.ac.uk> Date: Thu, 10 Aug 2023 15:54:36 +0100 Subject: [PATCH] feat: More robust survey preprocessing --- coordinator/ProcessorSurveyUtils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/coordinator/ProcessorSurveyUtils.py b/coordinator/ProcessorSurveyUtils.py index 4da2ecc..23462b7 100644 --- a/coordinator/ProcessorSurveyUtils.py +++ b/coordinator/ProcessorSurveyUtils.py @@ -74,10 +74,17 @@ def parse_date(series,name_out='date',fmt_in = '%Y-%m-%d',fmt_out= '%b %d, %Y'): return s_out -def parse_cases(series, name_out, cases, dtype = None): +def parse_cases(series, name_out, cases, dtype = None, fillna = None): + if dtype is None: dtype = series.dtype + + # Handle nans explicitly + if fillna is not None: + series.fillna(fillna,inplace=True) + # Converting entries according to a cases dictionary + # nans cannot be handled by map series_out = series.map(cases).astype(dtype) # Renaming series -- GitLab