diff --git a/coordinator/ProcessorSurveys.py b/coordinator/ProcessorSurveys.py index e48973218fd2258462d29810654fff5875adbea2..f60985650a4fa1403f87ea96a2319e4eedb230a8 100644 --- a/coordinator/ProcessorSurveys.py +++ b/coordinator/ProcessorSurveys.py @@ -3,19 +3,23 @@ Group surveys and run source gen on each groups: Grouping is only run if it is defined in the config (otherwise it use all available surveys). - -Example: - "Groups" : { - "PROD" : ["ODK-server", "kobo-server", "CSV-CAM"], - "WRT" : ["ODK", "CSV"] - } - -It is based on the 'Origin' column of the surveys. -All unique 'Origin' will create its own group alongside the ones defined in the config. -Groups can be ignored by defining 'GroupsToIgnore' in the config (this has no effect on 'Origin' types). - +Groups need to have a name, a key to at least one column and a list of unique elements in that column to group by. Example: - "GroupsToIgnore" : ["ODK-server", "kobo-server", "CSV-CAM", "ODK", "CSV", "PV"] + "Groups" : { + "PROD" : { + "Origin" : ["CSV_01", "CSV_02", "ODK_01", "ODK_02"], + "PublishedLevel" : ["Unpubl", "Publ", "Unknown"] + }, + "WRT-Publ" : { + "Origin" : ["CSV_01", "CSV_02", "ODK_01", "ODK_02"], + "PublishedLevel" : ["Publ"] + } + }, + "GroupBy" : ["Origin", "PublishedLevel"], + "GroupsToIgnore" : ["CSV_01", "CSV_02", "ODK_01", "ODK_02", "Unpubl", "Publ"], + +Alongside the defined groups, individual groups get created based on the unique elements in the columns defined in "GroupBy". +Groups can be ignored by defining "GroupsToIgnore" in the config. ''' import csv