From 07ec5eebe1c29d614160c6f2bf0e76c0d0689468 Mon Sep 17 00:00:00 2001
From: Rich Wareham <rjw57@cam.ac.uk>
Date: Thu, 2 May 2019 15:52:49 +0100
Subject: [PATCH] allow configuration of API batch size and inter-batch delay

Add configuration parameters to tweak how we call the Google API. Allow
customisation of API batch size and the delay between calls to the API.
This allows us to work around some Google API rate limits.

Closes #2
---
 configuration-example.yaml |  8 ++++++++
 gsuitesync/sync.py         | 12 +++++++++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/configuration-example.yaml b/configuration-example.yaml
index 0d0e106..7d42c5e 100644
--- a/configuration-example.yaml
+++ b/configuration-example.yaml
@@ -10,6 +10,14 @@ sync:
   # completely outside of this script.
   ignore_google_org_unit_path_regex: '^/Service Accounts$'
 
+  # Inter-batch delay in seconds. This is useful to avoid hitting Google rate
+  # limits. Default: 5.
+  inter_batch_delay: 5
+
+  # Batch size for Google API calls. Google supports batching requests together
+  # into one API call. Default: 50.
+  batch_size: 50
+
 # Configure limits defining maximum scope of changes.
 limits:
   # The abort_... settings below are safety limits and will abort the run if the
diff --git a/gsuitesync/sync.py b/gsuitesync/sync.py
index be99317..2f0e9c8 100644
--- a/gsuitesync/sync.py
+++ b/gsuitesync/sync.py
@@ -6,8 +6,10 @@ import crypt
 import dataclasses
 import itertools
 import logging
+import numbers
 import re
 import secrets
+import time
 import typing
 
 from googleapiclient import discovery
@@ -42,6 +44,13 @@ class Configuration(config.ConfigurationDataclassMixin):
     # are managed completely outside of this script.
     ignore_google_org_unit_path_regex: typing.Union[str, None] = None
 
+    # Inter-batch delay in seconds. This is useful to avoid hitting Google rate limits.
+    inter_batch_delay: numbers.Real = 5
+
+    # Batch size for Google API calls. Google supports batching requests together into one API
+    # call.
+    batch_size: int = 50
+
 
 def sync(configuration, *, read_only=True):
     """Perform sync given configuration dictionary."""
@@ -281,7 +290,7 @@ def sync(configuration, *, read_only=True):
     # Make an chunked iterator of requests to the directory API. The Directory API supports a
     # maximum batch size of 1000. See:
     # https://developers.google.com/admin-sdk/directory/v1/guides/batch
-    for request_batch in _grouper(api_requests(), n=1000):
+    for request_batch in _grouper(api_requests(), n=sync_config.batch_size):
         # Form batch request.
         batch = directory_service.new_batch_http_request()
         for request in request_batch:
@@ -290,6 +299,7 @@ def sync(configuration, *, read_only=True):
         # Execute the batch request if not in read only mode. Otherwise log that we would have.
         if not read_only:
             LOG.info('Issuing batch request to Google.')
+            time.sleep(sync_config.inter_batch_delay)
             batch.execute()
         else:
             LOG.info('Not issuing batch request in read-only mode.')
-- 
GitLab