From e67d20602d7b13f28c3b3812c74b3ffc0f3f52b3 Mon Sep 17 00:00:00 2001
From: tm689 <tm689@cam.ac.uk>
Date: Tue, 26 Mar 2024 11:34:55 +0000
Subject: [PATCH] Feat: add dtype convert function

---
 coordinator/ProcessorSurveyUtils.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/coordinator/ProcessorSurveyUtils.py b/coordinator/ProcessorSurveyUtils.py
index 8d8b888..92be9e7 100644
--- a/coordinator/ProcessorSurveyUtils.py
+++ b/coordinator/ProcessorSurveyUtils.py
@@ -74,6 +74,21 @@ def parse_date(series,name_out='date',fmt_in = '%Y-%m-%d',fmt_out= '%b %d, %Y'):
 
     return s_out
 
+def parse_dtype(series, name_out, dtype, fillna = None):
+
+    # Handle nans explicitly
+    if fillna is not None:
+        series.fillna(fillna,inplace=True)
+
+    # Replacing '' with None and converting dtype
+    # '' cannot be handled by astype
+    series_out = series.replace('',None).astype(dtype)
+    
+    # Renaming series
+    series_out.rename(name_out,inplace=True)
+
+    return series_out
+
 def parse_cases(series, name_out, cases, dtype = None, fillna = None):
     
     if dtype is None:
@@ -97,7 +112,8 @@ def parse_cases(series, name_out, cases, dtype = None, fillna = None):
 func_dict = {
     'parse_date' : parse_date,
     'parse_location_kobotoolbox' : parse_location_kobotoolbox,
-    'parse_cases' : parse_cases
+    'parse_cases' : parse_cases,
+    'parse_dtype' : parse_dtype
 }
 
 def parse_columns(df_in,coln_parser_dict):
-- 
GitLab