From a3dadc21cc13b9d4da37dafb7b05586ca4bef6e3 Mon Sep 17 00:00:00 2001 From: tm689 <tm689@cam.ac.uk> Date: Fri, 1 Mar 2024 11:29:02 +0000 Subject: [PATCH] doc: updating documentation for the advanced grouping --- coordinator/ProcessorSurveys.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py index e489732..f609856 100644 --- a/coordinator/ProcessorSurveys.py +++ b/coordinator/ProcessorSurveys.py @@ -3,19 +3,23 @@ Group surveys and run source gen on each groups: Grouping is only run if it is defined in the config (otherwise it use all available surveys). - -Example: - "Groups" : { - "PROD" : ["ODK-server", "kobo-server", "CSV-CAM"], - "WRT" : ["ODK", "CSV"] - } - -It is based on the 'Origin' column of the surveys. -All unique 'Origin' will create its own group alongside the ones defined in the config. -Groups can be ignored by defining 'GroupsToIgnore' in the config (this has no effect on 'Origin' types). - +Groups need to have a name, a key to at least one column and a list of unique elements in that column to group by. Example: - "GroupsToIgnore" : ["ODK-server", "kobo-server", "CSV-CAM", "ODK", "CSV", "PV"] + "Groups" : { + "PROD" : { + "Origin" : ["CSV_01", "CSV_02", "ODK_01", "ODK_02"], + "PublishedLevel" : ["Unpubl", "Publ", "Unknown"] + }, + "WRT-Publ" : { + "Origin" : ["CSV_01", "CSV_02", "ODK_01", "ODK_02"], + "PublishedLevel" : ["Publ"] + } + }, + "GroupBy" : ["Origin", "PublishedLevel"], + "GroupsToIgnore" : ["CSV_01", "CSV_02", "ODK_01", "ODK_02", "Unpubl", "Publ"], + +Alongside the defined groups, individual groups get created based on the unique elements in the columns defined in "GroupBy". +Groups can be ignored by defining "GroupsToIgnore" in the config. ''' import csv -- GitLab