diff --git a/Dockerfile b/Dockerfile
index 694d30c0f6eea52bb60036e7c30e40a6bbfbbe01..d210cce417d1552a2eee66d210eabadc023745cf 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
 # This Dockerfile is intended only to support the Auto-DevOps pipeline on GitLab.
 # It's not intended to package the application.
 
-FROM uisautomation/python:3.7-alpine
+FROM registry.gitlab.developers.cam.ac.uk/uis/devops/infra/dockerimages/python:3.7-alpine
 
 WORKDIR /usr/src/app
 
diff --git a/README.md b/README.md
index 9ceb4cc74ea7f9a61d365dda64e6fdbae1089c69..798956fd5326f474ace3464489677bb2ec34c0c3 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,10 @@ file in the following places in the following order:
 
 The first located file is used.
 
+> **Note:** The `crypt` python library behaves differently on MacOS than Linux
+> and fails to create acceptable passwords for new Google accounts.
+> Build the docker image and run gsuitesync via it to overcome this.
+
 ## Installation
 
 The command-line tool can be installed directly from the git repository:
diff --git a/configuration-example.yaml b/configuration-example.yaml
index dba8141fb3eb255ec2511ec3a7cee68ce5d5a308..c8d8004749dea384b97d1e81a5682a653dbbff72 100644
--- a/configuration-example.yaml
+++ b/configuration-example.yaml
@@ -151,6 +151,9 @@ ldap:
   # use SSL when connecting to the LDAP server, and will attempt to
   # authenticate with these credentials.
   #
+  # Username needs to be the full DN of the group, e.g.
+  #   groupid=123456,ou=groups,o=example-corps,dc=example,dc=com
+  #
   # The username and password properties should _not_ be specified when running
   # the sync tool inside the CUDN (which includes running in the CI pipeline).
   username: null
diff --git a/gsuitesync/config.py b/gsuitesync/config.py
deleted file mode 100644
index 4f51965719b94eccfcc40a527e21cdf087f4885f..0000000000000000000000000000000000000000
--- a/gsuitesync/config.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
-Utilities for parsing configuration files.
-
-"""
-import dataclasses
-import logging
-import os
-
-import yaml
-
-LOG = logging.getLogger(__name__)
-
-
-class ConfigurationError(RuntimeError):
-    """
-    Base class for all configuration errors.
-
-    """
-
-
-class ConfigurationNotFound(ConfigurationError):
-    """
-    A suitable configuration could not be located.
-
-    """
-    def __init__(self):
-        return super().__init__('Could not find any configuration file')
-
-
-def load_configuration(location=None):
-    """
-    Load configuration and return a :py:class:`Configuration` instance. Pass a non-None location to
-    override the default search path.
-
-    :raises: ConfigurationError if the configuration could not be loaded.
-
-    """
-    if location is not None:
-        paths = [location]
-    else:
-        if 'GSUITESYNC_CONFIGURATION' in os.environ:
-            paths = [os.environ['GSUITESYNC_CONFIGURATION']]
-        else:
-            paths = []
-        paths.extend([
-            os.path.join(os.getcwd(), 'gsuitesync.yaml'),
-            os.path.expanduser('~/.gsuitesync/configuration.yaml'),
-            '/etc/gsuitesync/configuration.yaml'
-        ])
-
-    valid_paths = [path for path in paths if os.path.isfile(path)]
-
-    if len(valid_paths) == 0:
-        LOG.error('Could not find configuration file. Tried:')
-        for path in paths:
-            LOG.error('"%s"', path)
-        raise ConfigurationNotFound()
-
-    with open(valid_paths[0]) as f:
-        return yaml.safe_load(f)
-
-
-class ConfigurationDataclassMixin:
-    """
-    Mixin class for dataclass which adds a "from_dict" member which will construct an instance from
-    a dictionary. Fields which have no default value become required fields.
-
-    """
-
-    @classmethod
-    def from_dict(cls, dict_):
-        """
-        Construct an instance from a dict.
-
-        """
-        field_names = {field.name for field in dataclasses.fields(cls)}
-        required_field_names = {
-            field.name for field in dataclasses.fields(cls)
-            if field.default is dataclasses.MISSING
-        }
-
-        for key in dict_.keys():
-            if key not in field_names:
-                raise ValueError(f'Unknown configuration key: {key}')
-
-        for key in required_field_names:
-            if key not in dict_:
-                raise ValueError(f'{key}: required field not set')
-
-        return cls(**dict_)
diff --git a/gsuitesync/config/__init__.py b/gsuitesync/config/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1cc685e8a4f4bf9b05de88f7c67c6db27e7d610
--- /dev/null
+++ b/gsuitesync/config/__init__.py
@@ -0,0 +1,5 @@
+"""
+Configuration definitions
+
+"""
+from .utils import load_configuration, parse_configuration  # noqa: F401
diff --git a/gsuitesync/config/exceptions.py b/gsuitesync/config/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6cb9b50a7ae57eeae7e3095886e3526320df0a5
--- /dev/null
+++ b/gsuitesync/config/exceptions.py
@@ -0,0 +1,20 @@
+"""
+Configuration Exceptions
+
+"""
+
+
+class ConfigurationError(RuntimeError):
+    """
+    Base class for all configuration errors.
+
+    """
+
+
+class ConfigurationNotFound(ConfigurationError):
+    """
+    A suitable configuration could not be located.
+
+    """
+    def __init__(self):
+        return super().__init__('Could not find any configuration file')
diff --git a/gsuitesync/config/gapiauth.py b/gsuitesync/config/gapiauth.py
new file mode 100644
index 0000000000000000000000000000000000000000..11b62c2c72bff7004c25e6f63f52f0ac384b4f6e
--- /dev/null
+++ b/gsuitesync/config/gapiauth.py
@@ -0,0 +1,27 @@
+"""
+Google API authentication.
+
+"""
+import dataclasses
+import logging
+import typing
+
+from .mixin import ConfigurationDataclassMixin
+
+
+LOG = logging.getLogger(__name__)
+
+
+@dataclasses.dataclass
+class Configuration(ConfigurationDataclassMixin):
+    """
+    Configuration of Google API access credentials.
+
+    """
+    # Path to on-disk JSON credentials used when accessing the API.
+    credentials: str
+
+    # Path to on-disk JSON credentials used when accessing the API in "read-only" mode. Use this if
+    # you want to have a separate "safe" service account which can only read data. If null, use the
+    # same credentials for reading and writing.
+    read_only_credentials: typing.Union[str, None] = None
diff --git a/gsuitesync/gapidomain.py b/gsuitesync/config/gapidomain.py
similarity index 95%
rename from gsuitesync/gapidomain.py
rename to gsuitesync/config/gapidomain.py
index a4dc18c3d12bf712f4ed9c7cc252104eb0aa51eb..60389b07a7a7c9dc724779a68ec7928930310057 100644
--- a/gsuitesync/gapidomain.py
+++ b/gsuitesync/config/gapidomain.py
@@ -5,7 +5,7 @@ Google Domain management.
 import dataclasses
 import typing
 
-from .config import ConfigurationDataclassMixin
+from .mixin import ConfigurationDataclassMixin
 
 
 @dataclasses.dataclass
diff --git a/gsuitesync/config/ldap.py b/gsuitesync/config/ldap.py
new file mode 100644
index 0000000000000000000000000000000000000000..75fb5372670066e3c96bfa71c4b5a6deaa7c4182
--- /dev/null
+++ b/gsuitesync/config/ldap.py
@@ -0,0 +1,39 @@
+"""
+Retrieving user information from an LDAP directory.
+
+"""
+import dataclasses
+import typing
+
+from .mixin import ConfigurationDataclassMixin
+
+
+@dataclasses.dataclass
+class Configuration(ConfigurationDataclassMixin):
+    """
+    Configuration for accessing the LDAP directory.
+
+    """
+    host: str
+
+    user_search_base: str
+
+    group_search_base: str
+
+    inst_search_base: str
+
+    eligible_user_filter: str
+
+    eligible_group_filter: str
+
+    eligible_inst_filter: str
+
+    username: str = None
+
+    password: str = None
+
+    managed_user_filter: typing.Union[str, None] = None
+
+    managed_group_filter: typing.Union[str, None] = None
+
+    managed_inst_filter: typing.Union[str, None] = None
diff --git a/gsuitesync/limits.py b/gsuitesync/config/limits.py
similarity index 97%
rename from gsuitesync/limits.py
rename to gsuitesync/config/limits.py
index d3fa24c798545861af5a66966edf1ff214e6bdf4..980c430e24e4d4c8b8bbab527f65cfc79a1846e8 100644
--- a/gsuitesync/limits.py
+++ b/gsuitesync/config/limits.py
@@ -6,11 +6,11 @@ import dataclasses
 import numbers
 import typing
 
-from . import config
+from .mixin import ConfigurationDataclassMixin
 
 
 @dataclasses.dataclass
-class Configuration(config.ConfigurationDataclassMixin):
+class Configuration(ConfigurationDataclassMixin):
     """
     Configuration for synchronisation limits.
 
diff --git a/gsuitesync/config/mixin.py b/gsuitesync/config/mixin.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d54dadf2ed044edd36b4f7c3a4b41a4c2a733cb
--- /dev/null
+++ b/gsuitesync/config/mixin.py
@@ -0,0 +1,30 @@
+"""
+Mixin class for dataclass which adds a "from_dict" member which will construct an instance from
+a dictionary. Fields which have no default value become required fields.
+
+"""
+import dataclasses
+
+
+class ConfigurationDataclassMixin:
+    @classmethod
+    def from_dict(cls, dict_):
+        """
+        Construct an instance from a dict.
+
+        """
+        field_names = {field.name for field in dataclasses.fields(cls)}
+        required_field_names = {
+            field.name for field in dataclasses.fields(cls)
+            if field.default is dataclasses.MISSING
+        }
+
+        for key in dict_.keys():
+            if key not in field_names:
+                raise ValueError(f'Unknown configuration key: {key}')
+
+        for key in required_field_names:
+            if key not in dict_:
+                raise ValueError(f'{key}: required field not set')
+
+        return cls(**dict_)
diff --git a/gsuitesync/config/sync.py b/gsuitesync/config/sync.py
new file mode 100644
index 0000000000000000000000000000000000000000..f19771a17ca84a65361279f2c56779cf1e82f915
--- /dev/null
+++ b/gsuitesync/config/sync.py
@@ -0,0 +1,56 @@
+"""
+Synchronisation configuration.
+
+"""
+import dataclasses
+import numbers
+import typing
+
+from .mixin import ConfigurationDataclassMixin
+
+
+@dataclasses.dataclass
+class Configuration(ConfigurationDataclassMixin):
+    # A regular expression which is used to match the organization unit path for Google users who
+    # should be excluded from the list returned by Google. Those users do not exist for the
+    # purposes of the rest of the sync and so if they appear in the list of managed users this
+    # script will attempt to re-add them and fail in the process. Use this setting for users who
+    # are managed completely outside of this script.
+    ignore_google_org_unit_path_regex: typing.Union[str, None] = None
+
+    # The organization unit path in which new accounts are placed
+    new_user_org_unit_path: str = '/'
+
+    # Suffix appended to the names of groups created in Google. The Google group name will be
+    # "{groupName}{group_name_suffix}", where {groupName} is the Lookup group name.
+    group_name_suffix: str = ' from lookup.cam.ac.uk'
+
+    # Settings to be applied to groups in Google. These settings are applied to both new and
+    # existing groups imported from Lookup.
+    # See https://developers.google.com/admin-sdk/groups-settings/v1/reference/groups#json
+    group_settings: dict = dataclasses.field(default_factory=lambda: {
+      'whoCanJoin': 'INVITED_CAN_JOIN',
+      'whoCanViewMembership': 'ALL_IN_DOMAIN_CAN_VIEW',
+      'whoCanViewGroup': 'ALL_MEMBERS_CAN_VIEW',
+      'whoCanPostMessage': 'ALL_IN_DOMAIN_CAN_POST',
+      'allowWebPosting': 'false',
+      'messageModerationLevel': 'MODERATE_ALL_MESSAGES',
+      'includeInGlobalAddressList': 'true',
+      'whoCanLeaveGroup': 'NONE_CAN_LEAVE',
+      'whoCanContactOwner': 'ALL_MANAGERS_CAN_CONTACT',
+      'whoCanModerateMembers': 'OWNERS_ONLY',
+      'whoCanDiscoverGroup': 'ALL_IN_DOMAIN_CAN_DISCOVER',
+    })
+
+    # Inter-batch delay in seconds. This is useful to avoid hitting Google rate limits.
+    inter_batch_delay: numbers.Real = 5
+
+    # Batch size for Google API calls. Google supports batching requests together into one API
+    # call.
+    batch_size: int = 50
+
+    # Number of times to retry HTTP requests if a HTTP failure response is received
+    http_retries: int = 5
+
+    # Delay in seconds between retying a request that has failed
+    http_retry_delay: numbers.Real = 5
diff --git a/gsuitesync/config/utils.py b/gsuitesync/config/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a12d50b293ce87dad6c6ac5ce84c91a5dfb8b075
--- /dev/null
+++ b/gsuitesync/config/utils.py
@@ -0,0 +1,59 @@
+import logging
+import os
+import yaml
+
+from .exceptions import ConfigurationNotFound
+
+# Configuration declarations
+from . import gapiauth, gapidomain, ldap, limits, sync
+
+LOG = logging.getLogger(__name__)
+
+
+def load_configuration(location=None):
+    """
+    Load configuration and return a :py:class:`Configuration` instance. Pass a non-None location to
+    override the default search path.
+
+    :raises: ConfigurationError if the configuration could not be loaded.
+
+    """
+    if location is not None:
+        paths = [location]
+    else:
+        if 'GSUITESYNC_CONFIGURATION' in os.environ:
+            paths = [os.environ['GSUITESYNC_CONFIGURATION']]
+        else:
+            paths = []
+        paths.extend([
+            os.path.join(os.getcwd(), 'gsuitesync.yaml'),
+            os.path.expanduser('~/.gsuitesync/configuration.yaml'),
+            '/etc/gsuitesync/configuration.yaml'
+        ])
+
+    valid_paths = [path for path in paths if os.path.isfile(path)]
+
+    if len(valid_paths) == 0:
+        LOG.error('Could not find configuration file. Tried:')
+        for path in paths:
+            LOG.error('"%s"', path)
+        raise ConfigurationNotFound()
+
+    with open(valid_paths[0]) as f:
+        return yaml.safe_load(f)
+
+
+def parse_configuration(configuration):
+    """
+    Parses the multiple parts of configuration using appropriate Configuration classes.
+    Returns a dict containing parsed parts of configuration.
+
+    """
+    return {
+        'sync': sync.Configuration.from_dict(configuration.get('sync', {})),
+        'gapi_domain': gapidomain.Configuration.from_dict(configuration.get('google_domain', {})),
+        'ldap': ldap.Configuration.from_dict(configuration.get('ldap', {})),
+        'limits': limits.Configuration.from_dict(configuration.get('limits', {})),
+        'gapi_auth': gapiauth.Configuration.from_dict(
+            configuration.get('google_api', {}).get('auth', {})),
+    }
diff --git a/gsuitesync/gapiauth.py b/gsuitesync/gapiauth.py
deleted file mode 100644
index 436e7696a9562658888d450f37b2ba1dd3bd74ba..0000000000000000000000000000000000000000
--- a/gsuitesync/gapiauth.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Google API authentication.
-
-"""
-import dataclasses
-import logging
-import typing
-
-from google.oauth2 import service_account
-
-from .config import ConfigurationDataclassMixin
-
-
-LOG = logging.getLogger(__name__)
-
-
-@dataclasses.dataclass
-class Configuration(ConfigurationDataclassMixin):
-    """
-    Configuration of Google API access credentials.
-
-    """
-    # Path to on-disk JSON credentials used when accessing the API.
-    credentials: str
-
-    # Path to on-disk JSON credentials used when accessing the API in "read-only" mode. Use this if
-    # you want to have a separate "safe" service account which can only read data. If null, use the
-    # same credentials for reading and writing.
-    read_only_credentials: typing.Union[str, None] = None
-
-    def load_credentials(self, *, read_only=True):
-        """
-        Create a Google credentials object from the configuration. Use *read_only* to indicate if
-        read-only credentials are preferred.
-
-        """
-        credentials = self.credentials
-        if read_only and self.read_only_credentials is not None:
-            credentials = self.read_only_credentials
-            LOG.info('Using read-only credentials.')
-
-        LOG.info('Loading Google account credentials from "%s"', credentials)
-        return service_account.Credentials.from_service_account_file(credentials)
diff --git a/gsuitesync/gapiutil.py b/gsuitesync/gapiutil.py
index abe46bfac11ebf6e2d5ed53864c91e36d35566e7..94f43854ebddfacc2cbf4ebd88bc613c7f176a74 100644
--- a/gsuitesync/gapiutil.py
+++ b/gsuitesync/gapiutil.py
@@ -3,6 +3,7 @@ Utility functions which should have been part of the Google API client.
 
 """
 import logging
+import itertools
 from googleapiclient.errors import HttpError
 from time import sleep
 
@@ -55,7 +56,7 @@ def list_all_in_list(directory_service, list_cb, *, item_ids=[], id_key='key', b
     the "list_cb" Google API method for each item in the "item_ids" list, repeatedly fetching
     pages of results for each item and merging them together. The key used to identify the
     original items in Google is specified by the "id_key" argument. Returns a dictionary mapping
-    the orginal item IDs to the merged "items" arrays from the responses for each item.
+    the original item IDs to the merged "items" arrays from the responses for each item.
 
     This is equivalent to calling list_all() for each item in the "item_ids" list, and collecting
     all the results in a dictionary, except that it uses the Google batch processing API to reduce
@@ -179,3 +180,79 @@ def get_all_in_list(directory_service, get_cb, *, item_ids=[], id_key='key', bat
             break
 
     return resources
+
+
+def process_requests(service, requests, sync_config, read_only=True):
+    """
+    Process an iterable list of requests to the specified Google service in batches.
+    These APIs support a maximum batch size of 1000. See:
+    https://developers.google.com/admin-sdk/directory/v1/guides/batch
+
+    """
+    for request_batch in _grouper(requests, n=sync_config.batch_size):
+        # Form batch request.
+        batch = service.new_batch_http_request()
+        for request in request_batch:
+            batch.add(request, callback=_handle_batch_response)
+
+        # Execute the batch request if not in read only mode. Otherwise log that we would
+        # have.
+        if not read_only:
+            LOG.info('Issuing batch request to Google.')
+            sleep(sync_config.inter_batch_delay)
+            retries = sync_config.http_retries
+            while True:
+                try:
+                    batch.execute()
+                except HttpError as err:
+                    if (err.resp.status == 503 and retries > 0):
+                        retries -= 1
+                        LOG.warn('503: Service unavailable - retrying')
+                        sleep(sync_config.http_retry_delay)
+                        continue
+                    if retries == 0:
+                        LOG.error('503: Service unavailable - retry count exceeded')
+                    raise
+                break
+        else:
+            LOG.info('Not issuing batch request in read-only mode.')
+
+
+def _handle_batch_response(request_id, response, exception):
+    if exception is not None:
+        LOG.error('Error performing request: %s', exception)
+        LOG.error('Response: %r', response)
+
+
+def _grouper(iterable, *, n):
+    """
+    Group an iterable into chunks of at most *n* elements. A generator which yields iterables
+    representing slices of *iterable*.
+
+    >>> [list(i) for i in _grouper('ABCDEFGH', n=3)]
+    [['A', 'B', 'C'], ['D', 'E', 'F'], ['G', 'H']]
+    >>> def generator(stop):
+    ...     for x in range(stop):
+    ...         yield x
+    >>> [list(i) for i in _grouper(generator(10), n=3)]
+    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
+    >>> [list(i) for i in _grouper(generator(12), n=3)]
+    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
+
+    The implementation of this function attempts to be efficient; the chunks are iterables which
+    are generated on demand rather than being constructed first. Hence this function can deal with
+    iterables which would fill memory if intermediate chunks were stored.
+
+    >>> i = _grouper(generator(100000000000000000000), n=1000000000000000)
+    >>> next(next(i))
+    0
+
+    """
+    it = iter(iterable)
+    while True:
+        next_chunk_it = itertools.islice(it, n)
+        try:
+            first = next(next_chunk_it)
+        except StopIteration:
+            return
+        yield itertools.chain((first,), next_chunk_it)
diff --git a/gsuitesync/ldap.py b/gsuitesync/ldap.py
deleted file mode 100644
index 6a493dacd355b71cb8bee78206adc0ccad3216ff..0000000000000000000000000000000000000000
--- a/gsuitesync/ldap.py
+++ /dev/null
@@ -1,203 +0,0 @@
-"""
-Retrieving user information from an LDAP directory.
-
-"""
-import collections
-import dataclasses
-import typing
-
-import ldap3
-
-from .config import ConfigurationDataclassMixin
-
-
-# User and group information we need to populate the Google user directory.
-UserEntry = collections.namedtuple('UserEntry', 'uid cn sn displayName givenName')
-GroupEntry = collections.namedtuple('GroupEntry', 'groupID groupName description uids')
-
-
-@dataclasses.dataclass
-class Configuration(ConfigurationDataclassMixin):
-    """
-    Configuration for accessing the LDAP directory.
-
-    """
-    host: str
-
-    user_search_base: str
-
-    group_search_base: str
-
-    inst_search_base: str
-
-    eligible_user_filter: str
-
-    eligible_group_filter: str
-
-    eligible_inst_filter: str
-
-    username: str = None
-
-    password: str = None
-
-    managed_user_filter: typing.Union[str, None] = None
-
-    managed_group_filter: typing.Union[str, None] = None
-
-    managed_inst_filter: typing.Union[str, None] = None
-
-    def get_eligible_uids(self):
-        """
-        Return a set containing all uids who are eligible to have a Google account.
-
-        """
-        return {
-            e['attributes']['uid'][0]
-            for e in self._search(
-                search_base=self.user_search_base, search_filter=self.eligible_user_filter,
-                attributes=['uid']
-            )
-        }
-
-    def get_eligible_groupIDs(self):
-        """
-        Return a set containing all groupIDs that are eligible for Google.
-
-        """
-        return {
-            e['attributes']['groupID'][0]
-            for e in self._search(
-                search_base=self.group_search_base, search_filter=self.eligible_group_filter,
-                attributes=['groupID']
-            )
-        }
-
-    def get_eligible_instIDs(self):
-        """
-        Return a set containing all instIDs that are eligible for Google.
-
-        """
-        return {
-            e['attributes']['instID'][0]
-            for e in self._search(
-                search_base=self.inst_search_base, search_filter=self.eligible_inst_filter,
-                attributes=['instID']
-            )
-        }
-
-    def get_managed_user_entries(self):
-        """
-        Return a list containing all managed user entries as UserEntry instances.
-
-        """
-        search_filter = (
-            self.managed_user_filter
-            if self.managed_user_filter is not None
-            else self.eligible_user_filter
-        )
-        return [
-            UserEntry(
-                uid=_extract(e, 'uid'), cn=_extract(e, 'cn'), sn=_extract(e, 'sn'),
-                displayName=_extract(e, 'displayName'), givenName=_extract(e, 'givenName')
-            )
-            for e in self._search(
-                search_base=self.user_search_base, search_filter=search_filter,
-                attributes=['uid', 'cn', 'sn', 'displayName', 'givenName']
-            )
-        ]
-
-    def get_managed_group_entries(self):
-        """
-        Return a list containing all managed group entries as GroupEntry instances.
-
-        """
-        search_filter = (
-            self.managed_group_filter
-            if self.managed_group_filter is not None
-            else self.eligible_group_filter
-        )
-        return [
-            GroupEntry(
-                groupID=_extract(e, 'groupID'), groupName=_extract(e, 'groupName'),
-                description=_extract(e, 'description'), uids=set(e['attributes'].get('uid', []))
-            )
-            for e in self._search(
-                search_base=self.group_search_base, search_filter=search_filter,
-                attributes=['groupID', 'groupName', 'description', 'uid']
-            )
-        ]
-
-    def get_managed_inst_entries(self):
-        """
-        Return a list containing all managed institution entries as GroupEntry instances.
-
-        Note that we return GroupEntry instances here since Lookup institutions become groups in
-        Google, and this simplifies the sync code by allowing us to handle institutions in the same
-        way as groups. The GroupEntry's groupID and groupName fields will be the institution's
-        instID and ou (name) respectively. Since Lookup institutions don't have descriptions, we
-        set the description field to the institution's name as well (in Google, the description
-        allows longer strings, and so will not truncate the name).
-
-        """
-        # This requires 2 LDAP queries. First find the managed institutions.
-        search_filter = (
-            self.managed_inst_filter
-            if self.managed_inst_filter is not None
-            else self.eligible_inst_filter
-        )
-        managed_insts = [
-            GroupEntry(
-                groupID=_extract(e, 'instID'), groupName=_extract(e, 'ou'),
-                description=_extract(e, 'ou'), uids=set(),
-            )
-            for e in self._search(
-                search_base=self.inst_search_base, search_filter=search_filter,
-                attributes=['instID', 'ou']
-            )
-        ]
-        managed_insts_by_instID = {g.groupID: g for g in managed_insts}
-
-        # Then get each eligible user's list of institutions and use that data to populate each
-        # institution's uid list.
-        eligible_users = self._search(
-            search_base=self.user_search_base, search_filter=self.eligible_user_filter,
-            attributes=['uid', 'instID']
-        )
-        for e in eligible_users:
-            uid = e['attributes']['uid'][0]
-            for instID in e['attributes']['instID']:
-                if instID in managed_insts_by_instID:
-                    managed_insts_by_instID[instID].uids.add(uid)
-
-        return managed_insts
-
-    def _search(self, *, search_base, search_filter, attributes):
-        # Use SSL to access the LDAP server when authentication credentials
-        # have been configured
-        use_ssl = self.username and self.password
-        ldap_server = ldap3.Server(self.host, use_ssl=use_ssl)
-
-        # Keyword arguments to pass to ldap3.Connection
-        connection_kwargs = {
-            'auto_bind': True
-        }
-
-        # Add authentication credentials if configured
-        if self.username:
-            connection_kwargs['username'] = self.username
-        if self.password:
-            connection_kwargs['password'] = self.password
-
-        # Connect to the LDAP server and perform the query
-        with ldap3.Connection(ldap_server, **connection_kwargs) as conn:
-            return conn.extend.standard.paged_search(
-                search_base, search_filter, paged_size=1000, attributes=attributes)
-
-
-def _extract(entry, attr, *, default=''):
-    vs = entry['attributes'].get(attr, [])
-    if len(vs) == 0:
-        return default
-    if isinstance(vs, str):
-        return vs
-    return vs[0]
diff --git a/gsuitesync/sync.py b/gsuitesync/sync.py
deleted file mode 100644
index d0d3ecb442a333f4af4628dad3e5fd7acbb216ad..0000000000000000000000000000000000000000
--- a/gsuitesync/sync.py
+++ /dev/null
@@ -1,928 +0,0 @@
-"""
-Synchronise Google Directory with a local LDAP directory.
-
-"""
-import crypt
-import dataclasses
-import itertools
-import logging
-import numbers
-import re
-import secrets
-import time
-import typing
-
-from googleapiclient import discovery, errors
-
-from . import config
-from . import gapiauth
-from . import gapidomain
-from . import gapiutil
-from . import ldap
-from . import limits
-from . import naming
-
-LOG = logging.getLogger(__name__)
-
-# Scopes required to perform read-only actions.
-READ_ONLY_SCOPES = [
-    'https://www.googleapis.com/auth/admin.directory.user.readonly',
-    'https://www.googleapis.com/auth/admin.directory.group.readonly',
-    'https://www.googleapis.com/auth/admin.directory.group.member.readonly',
-    'https://www.googleapis.com/auth/apps.groups.settings'
-]
-
-# Scopes *in addition to READ_ONLY_SCOPES* required to perform a full update.
-WRITE_SCOPES = [
-    'https://www.googleapis.com/auth/admin.directory.user',
-    'https://www.googleapis.com/auth/admin.directory.group',
-    'https://www.googleapis.com/auth/admin.directory.group.member'
-]
-
-
-@dataclasses.dataclass
-class Configuration(config.ConfigurationDataclassMixin):
-    # A regular expression which is used to match the organization unit path for Google users who
-    # should be excluded from the list returned by Google. Those users do not exist for the
-    # purposes of the rest of the sync and so if they appear in the list of managed users this
-    # script will attempt to re-add them and fail in the process. Use this setting for users who
-    # are managed completely outside of this script.
-    ignore_google_org_unit_path_regex: typing.Union[str, None] = None
-
-    # The organization unit path in which new accounts are placed
-    new_user_org_unit_path: str = '/'
-
-    # Suffix appended to the names of groups created in Google. The Google group name will be
-    # "{groupName}{group_name_suffix}", where {groupName} is the Lookup group name.
-    group_name_suffix: str = ' from lookup.cam.ac.uk'
-
-    # Settings to be applied to groups in Google. These settings are applied to both new and
-    # existing groups imported from Lookup.
-    # See https://developers.google.com/admin-sdk/groups-settings/v1/reference/groups#json
-    group_settings: dict = dataclasses.field(default_factory=lambda: {
-      'whoCanJoin': 'INVITED_CAN_JOIN',
-      'whoCanViewMembership': 'ALL_IN_DOMAIN_CAN_VIEW',
-      'whoCanViewGroup': 'ALL_MEMBERS_CAN_VIEW',
-      'whoCanPostMessage': 'ALL_IN_DOMAIN_CAN_POST',
-      'allowWebPosting': 'false',
-      'messageModerationLevel': 'MODERATE_ALL_MESSAGES',
-      'includeInGlobalAddressList': 'true',
-      'whoCanLeaveGroup': 'NONE_CAN_LEAVE',
-      'whoCanContactOwner': 'ALL_MANAGERS_CAN_CONTACT',
-      'whoCanModerateMembers': 'OWNERS_ONLY',
-      'whoCanDiscoverGroup': 'ALL_IN_DOMAIN_CAN_DISCOVER',
-    })
-
-    # Inter-batch delay in seconds. This is useful to avoid hitting Google rate limits.
-    inter_batch_delay: numbers.Real = 5
-
-    # Batch size for Google API calls. Google supports batching requests together into one API
-    # call.
-    batch_size: int = 50
-
-    # Number of times to retry HTTP requests if a HTTP failure response is received
-    http_retries: int = 5
-
-    # Delay in seconds between retying a request that has failed
-    http_retry_delay: numbers.Real = 5
-
-
-def sync(configuration, *, read_only=True, group_settings=False, just_users=False):
-    """Perform sync given configuration dictionary."""
-    if read_only:
-        LOG.info('Performing synchronisation in READ ONLY mode.')
-    else:
-        LOG.info('Performing synchronisation in WRITE mode.')
-
-    # Parse configuration
-    sync_config = Configuration.from_dict(configuration.get('sync', {}))
-    gapi_auth_config = gapiauth.Configuration.from_dict(
-        configuration.get('google_api', {}).get('auth', {}))
-    gapi_domain_config = gapidomain.Configuration.from_dict(
-        configuration.get('google_domain', {}))
-    ldap_config = ldap.Configuration.from_dict(configuration.get('ldap', {}))
-    limits_config = limits.Configuration.from_dict(configuration.get('limits', {}))
-
-    # Load appropriate Google credentials.
-    creds = (
-        gapi_auth_config.load_credentials(read_only=read_only)
-        .with_scopes(READ_ONLY_SCOPES + ([] if read_only else WRITE_SCOPES))
-    )
-    # Use admin_user if using service account with Domain-Wide Delegation
-    if gapi_domain_config.admin_user:
-        creds = creds.with_subject(gapi_domain_config.admin_user)
-
-    # Secondary domain for Google groups that come from Lookup groups
-    groups_domain = (
-        gapi_domain_config.groups_domain
-        if gapi_domain_config.groups_domain is not None
-        else gapi_domain_config.name
-    )
-
-    # Secondary domain for Google groups that come from Lookup institutions
-    insts_domain = (
-        gapi_domain_config.insts_domain
-        if gapi_domain_config.insts_domain is not None
-        else gapi_domain_config.name
-    )
-
-    # Functions to translate the unique identifiers of users, groups and institutions in Lookup
-    # (uids, groupIDs and instIDs) to and from the unique identifiers used in Google (email
-    # addresses).
-    #
-    # For users:   {uid}     <-> {uid}@{domain}
-    # For groups:  {groupID} <-> {groupID}@{groups_domain}
-    # For insts:   {instID}  <-> {instID.lower()}@{insts_domain}  (local part must be lowercase)
-    #
-    # Additionally, valid uids (CRSids) match the regex [a-z][a-z0-9]{3,7}, valid groupIDs match
-    # the regex [0-9]{6,8} and valid instIDs match the regex [A-Z][A-Z0-9]+.
-    #
-    # Since Lookup institutions become groups in Google, we use common code to sync all Google
-    # groups, regardless of whether they were groups or institutions in Lookup. In all the code
-    # that follows, we use "gid" to refer to the unique identifier of the group or institution in
-    # Lookup (i.e., gid may be either a Lookup groupID or instID).
-    user_email_regex = re.compile('^[a-z][a-z0-9]{3,7}@.*$')
-    groupID_regex = re.compile('^[0-9]{6,8}$')
-    instID_regex = re.compile('^[A-Z][A-Z0-9]+$')
-
-    def uid_to_email(uid):
-        return f'{uid}@{gapi_domain_config.name}'
-
-    def email_to_uid(email):
-        return email.split('@')[0] if user_email_regex.match(email) else None
-
-    def gid_to_email(gid):
-        return (
-            f'{gid}@{groups_domain}' if groupID_regex.match(gid) else
-            f'{gid.lower()}@{insts_domain}' if instID_regex.match(gid) else None
-        )
-
-    def email_to_gid(email):
-        gid = email.split('@')[0]
-        return (
-            gid if groupID_regex.match(gid) else
-            gid.upper() if instID_regex.match(gid.upper()) else None
-        )
-
-    # --------------------------------------------------------------------------------------------
-    # Load current user, group and institution data from Lookup.
-    # --------------------------------------------------------------------------------------------
-
-    # Get a set containing all CRSids. These are all the people who are eligible to be in our
-    # GSuite instance. If a user is in GSuite and is *not* present in this list then they are
-    # suspended.
-    LOG.info('Reading eligible user entries from LDAP')
-    eligible_uids = ldap_config.get_eligible_uids()
-    LOG.info('Total LDAP user entries: %s', len(eligible_uids))
-
-    # Sanity check: there are some eligible users (else LDAP lookup failure?)
-    if len(eligible_uids) == 0:
-        raise RuntimeError('Sanity check failed: no users in eligible set')
-
-    if just_users:
-        eligible_gids = set()
-    else:
-        # Get a set containing all groupIDs. These are all the groups that are eligible to be in
-        # our GSuite instance. If a group is in GSuite and is *not* present in this list then it
-        # is deleted.
-        LOG.info('Reading eligible group entries from LDAP')
-        eligible_groupIDs = ldap_config.get_eligible_groupIDs()
-        LOG.info('Total LDAP group entries: %s', len(eligible_groupIDs))
-
-        # Get a set containing all instIDs. These are all the institutions that are eligible to be
-        # in our GSuite instance. If an institution is in GSuite and is *not* present in this list
-        # then the corresponding group is deleted.
-        LOG.info('Reading eligible institution entries from LDAP')
-        eligible_instIDs = ldap_config.get_eligible_instIDs()
-        LOG.info('Total LDAP institution entries: %s', len(eligible_instIDs))
-
-        # Add these sets together to form the set of all gids (the IDs of all eligible groups and
-        # institutions).
-        eligible_gids = eligible_groupIDs | eligible_instIDs
-        LOG.info('Total combined LDAP group and institution entries: %s', len(eligible_gids))
-
-    # Get a list of managed users. These are all the people who match the "managed_user_filter" in
-    # the LDAP settings.
-    LOG.info('Reading managed user entries from LDAP')
-    managed_user_entries = ldap_config.get_managed_user_entries()
-
-    # Form a mapping from uid to managed user.
-    managed_user_entries_by_uid = {u.uid: u for u in managed_user_entries}
-
-    # Form a set of all *managed user* uids
-    managed_user_uids = set(managed_user_entries_by_uid.keys())
-    LOG.info('Total managed user entries: %s', len(managed_user_uids))
-
-    # Sanity check: the managed users should be a subset of the eligible ones.
-    if len(managed_user_uids - eligible_uids) != 0:
-        raise RuntimeError('Sanity check failed: some managed uids were not in the eligible set')
-
-    if just_users:
-        managed_group_entries = []
-        managed_group_entries_by_gid = dict()
-    else:
-        # Get a list of managed groups. These are all the groups that match the
-        # "managed_group_filter" in the LDAP settings.
-        LOG.info('Reading managed group entries from LDAP')
-        managed_group_entries = ldap_config.get_managed_group_entries()
-
-        # Form a mapping from groupID to managed group.
-        managed_group_entries_by_groupID = {g.groupID: g for g in managed_group_entries}
-
-        # Form a set of all *managed group* groupIDs
-        managed_group_groupIDs = set(managed_group_entries_by_groupID.keys())
-        LOG.info('Total managed group entries: %s', len(managed_group_groupIDs))
-        LOG.info(
-            'Total managed group members: %s',
-            sum([len(g.uids) for g in managed_group_entries])
-        )
-
-        # Get a list of managed institutions. These are all the institutions that match the
-        # "managed_inst_filter" in the LDAP settings.
-        LOG.info('Reading managed institution entries from LDAP')
-        managed_inst_entries = ldap_config.get_managed_inst_entries()
-
-        # Form a mapping from instID to managed institution.
-        managed_inst_entries_by_instID = {i.groupID: i for i in managed_inst_entries}
-
-        # Form a set of all *managed institution* instIDs
-        managed_inst_instIDs = set(managed_inst_entries_by_instID.keys())
-        LOG.info('Total managed institution entries: %s', len(managed_inst_instIDs))
-        LOG.info(
-            'Total managed institution members: %s',
-            sum([len(i.uids) for i in managed_inst_entries])
-        )
-
-        # Add the collections of managed institutions to the collections of managed groups.
-        managed_group_entries += managed_inst_entries
-        managed_group_entries_by_gid = {
-            **managed_group_entries_by_groupID, **managed_inst_entries_by_instID
-        }
-        managed_group_gids = managed_group_groupIDs | eligible_instIDs
-        LOG.info(
-            'Total combined managed group and institution entries: %s', len(managed_group_gids)
-        )
-        LOG.info(
-            'Total combined managed group and institution members: %s',
-            sum([len(g.uids) for g in managed_group_entries])
-        )
-
-        # Sanity check: the managed groups should be a subset of the eligible ones.
-        if len(managed_group_gids - eligible_gids) != 0:
-            raise RuntimeError(
-                'Sanity check failed: some managed gids were not in the eligible set'
-            )
-
-    # --------------------------------------------------------------------------------------------
-    # Load current user, group and institution data from Google.
-    # --------------------------------------------------------------------------------------------
-
-    # Build the directory service using Google API discovery.
-    directory_service = discovery.build('admin', 'directory_v1', credentials=creds)
-
-    # Also build the groupssettings service, which is a parallel API to manage group settings
-    groupssettings_service = discovery.build('groupssettings', 'v1', credentials=creds)
-
-    # Retrieve information on all users excluding domain admins.
-    LOG.info('Getting information on Google domain users')
-    fields = [
-        'id', 'isAdmin', 'orgUnitPath', 'primaryEmail', 'suspended', 'suspensionReason',
-        'name(givenName, familyName)',
-    ]
-    all_google_users = gapiutil.list_all(
-        directory_service.users().list, items_key='users', domain=gapi_domain_config.name,
-        query='isAdmin=false', fields='nextPageToken,users(' + ','.join(fields) + ')',
-        retries=sync_config.http_retries, retry_delay=sync_config.http_retry_delay,
-    )
-
-    # Function to fetch Google group information from the specified domain
-    def fetch_groups(domain):
-        fields = ['id', 'email', 'name', 'description']
-        return gapiutil.list_all(
-            directory_service.groups().list, items_key='groups', domain=domain,
-            fields='nextPageToken,groups(' + ','.join(fields) + ')',
-            retries=sync_config.http_retries, retry_delay=sync_config.http_retry_delay,
-        )
-
-    if just_users:
-        # pretend there are no google groups
-        all_google_groups = []
-    else:
-        # Retrieve information on all Google groups that come from Lookup groups
-        LOG.info('Getting information on Google domain groups')
-        all_google_groups = [
-            g for g in fetch_groups(groups_domain)
-            if groupID_regex.match(g['email'].split('@')[0])
-        ]
-
-        # Append information on all Google groups that come from Lookup institutions
-        LOG.info('Getting information on Google domain institutions')
-        all_google_groups.extend([
-            g for g in fetch_groups(insts_domain)
-            if instID_regex.match(g['email'].split('@')[0].upper())
-        ])
-
-    # Strip any "to be ignored" users out of the results.
-    if sync_config.ignore_google_org_unit_path_regex is not None:
-        LOG.info(
-            'Ignoring users whose organization unit path matches %r',
-            sync_config.ignore_google_org_unit_path_regex)
-        # Check that all users have an orgUnitPath
-        missing_org = [
-            u for u in all_google_users if 'orgUnitPath' not in u
-        ]
-        if len(missing_org) != 0:
-            LOG.error('User entries missing orgUnitPath: %s (starting with %s)',
-                      len(missing_org),
-                      missing_org[0]['primaryEmail'] if 'primaryEmail' in missing_org[0]
-                      else 'user with blank email')
-            raise RuntimeError('Sanity check failed: at least one user is missing orgUnitPath')
-        # Remove users matching regex
-        regex = re.compile(sync_config.ignore_google_org_unit_path_regex)
-        all_google_users = [
-            u for u in all_google_users if not regex.match(u['orgUnitPath'])
-        ]
-
-    # Strip out any users with uids (extracted from the local-part of the email address) that
-    # aren't valid CRSids. These users can't have come from Lookup, and so should not be managed
-    # (suspended) by this script.
-    all_google_users = [u for u in all_google_users if email_to_uid(u['primaryEmail'])]
-
-    # Strip out any groups whose email addresses don't match the pattern for groups created
-    # from Lookup groupIDs or instIDs, and which therefore should not be managed (deleted) by
-    # this script.
-    all_google_groups = [g for g in all_google_groups if email_to_gid(g['email'])]
-
-    # Sanity check. There should be no admins in the returned results.
-    if any(u.get('isAdmin', False) for u in all_google_users):
-        raise RuntimeError('Sanity check failed: admin users in user list')
-
-    # Form mappings from uid/gid to Google user/group.
-    all_google_users_by_uid = {email_to_uid(u['primaryEmail']): u for u in all_google_users}
-    all_google_groups_by_gid = {email_to_gid(g['email']): g for g in all_google_groups}
-
-    # Form sets of all Google-side uids and gids. The all_google_uids set is all users including
-    # the suspended ones and the suspended_google_uids set is only the suspended users. Non
-    # suspended users are therefore all_google_uids - suspended_google_uids. The all_google_gids
-    # set includes both groupIDs and instIDs. Groups in Google do not have any concept of being
-    # suspended.
-    all_google_uids = set(all_google_users_by_uid.keys())
-    all_google_gids = set(all_google_groups_by_gid.keys())
-    suspended_google_uids = {uid for uid, u in all_google_users_by_uid.items() if u['suspended']}
-
-    # Sanity check. We should not have lost anything. (I.e. the uids and gids should be unique.)
-    if len(all_google_uids) != len(all_google_users):
-        raise RuntimeError('Sanity check failed: user list changed length')
-    if len(all_google_gids) != len(all_google_groups):
-        raise RuntimeError('Sanity check failed: group list changed length')
-
-    if group_settings and not just_users:
-        # Retrieve all Google group settings.
-        fields = ['email', *[k for k in sync_config.group_settings.keys()]]
-        all_google_group_settings = gapiutil.get_all_in_list(
-            groupssettings_service, groupssettings_service.groups().get,
-            item_ids=[g['email'] for g in all_google_groups], id_key='groupUniqueId',
-            batch_size=sync_config.batch_size, fields=','.join(fields),
-            retries=sync_config.http_retries, retry_delay=sync_config.http_retry_delay,
-        )
-
-        # Form a mapping from gid to Google group settings.
-        all_google_group_settings_by_gid = {
-            email_to_gid(g['email']): g for g in all_google_group_settings
-        }
-
-        # Sanity check. We should have settings for each managed group.
-        if len(all_google_group_settings_by_gid) != len(all_google_groups):
-            raise RuntimeError(
-                'Sanity check failed: group settings list does not match group list'
-            )
-
-    # Retrieve all Google group memberships. This is a mapping from internal Google group ids to
-    # lists of member resources, corresponding to both Lookup groups and institutions.
-    if just_users:
-        all_google_members = dict()
-    else:
-        fields = ['id', 'email']
-        all_google_members = gapiutil.list_all_in_list(
-            directory_service, directory_service.members().list,
-            item_ids=[g['id'] for g in all_google_groups], id_key='groupKey',
-            batch_size=sync_config.batch_size, items_key='members',
-            fields='nextPageToken,members(' + ','.join(fields) + ')',
-            retries=sync_config.http_retries, retry_delay=sync_config.http_retry_delay,
-        )
-
-        # Santiy check. We should have a group members list for each managed group.
-        if len(all_google_members) != len(all_google_groups):
-            raise RuntimeError(
-                'Sanity check failed: groups in members map do not match group list')
-
-    # Log some stats.
-    LOG.info('Total Google users: %s', len(all_google_uids))
-    LOG.info(
-        'Suspended Google users: %s', sum(1 if u['suspended'] else 0 for u in all_google_users))
-    if not just_users:
-        LOG.info('Total Google groups: %s', len(all_google_gids))
-        LOG.info(
-            'Total Google group members: %s', sum([len(m) for g, m in all_google_members.items()])
-        )
-
-    # --------------------------------------------------------------------------------------------
-    # Compute differences between the Lookup and Google data.
-    # --------------------------------------------------------------------------------------------
-
-    # For each user which exists in Google or the managed user set which is eligible, determine if
-    # they need updating/creating. If so, record a patch/insert for the user.
-    LOG.info('Calculating updates...')
-    google_user_updates = {}
-    google_user_creations = {}
-    for uid, managed_user_entry in managed_user_entries_by_uid.items():
-        # Heuristically determine the given and family names.
-        names = naming.get_names(
-            uid=uid, display_name=managed_user_entry.displayName, cn=managed_user_entry.cn,
-            sn=managed_user_entry.sn, given_name=managed_user_entry.givenName)
-
-        # Form expected user resource fields.
-        expected_google_user = {
-            'name': {
-                'givenName': names.given_name,
-                'familyName': names.family_name,
-            },
-        }
-
-        # Find existing Google user (if any).
-        existing_google_user = all_google_users_by_uid.get(uid)
-
-        if existing_google_user is not None:
-            # See if we need to change the existing user
-            # Unless anything needs changing, the patch is empty.
-            patch = {}
-
-            # Determine how to patch user's name.
-            google_user_name = existing_google_user.get('name', {})
-            patch_name = {}
-            if google_user_name.get('givenName') != expected_google_user['name']['givenName']:
-                patch_name['givenName'] = names.given_name
-            if google_user_name.get('familyName') != expected_google_user['name']['familyName']:
-                patch_name['familyName'] = names.family_name
-            if len(patch_name) > 0:
-                patch['name'] = patch_name
-
-            # Only record non-empty patches.
-            if len(patch) > 0:
-                google_user_updates[uid] = patch
-        else:
-            # No existing Google user. Record the new resource. Generate a new user password and
-            # send Google the hash. It doesn't matter what this password is since we never have the
-            # user log in with it. For password-only applications the user can make use of an
-            # application-specific password.
-            new_user = {
-                'primaryEmail': uid_to_email(uid),
-                **expected_google_user,
-            }
-            google_user_creations[uid] = new_user
-
-    # For each group which exists in Google or the managed group set which is eligible, determine
-    # if it needs updating/creating. If so, record a patch/insert for the group.
-    google_group_updates = {}
-    google_group_creations = {}
-    for gid, managed_group_entry in managed_group_entries_by_gid.items():
-        # Form expected group resource fields. The 2 Google APIs we use here to update groups in
-        # Google each have different maximum lengths for group names and descriptions, and
-        # empirically the APIs don't function properly if either limit is exceeded, so we use the
-        # minimum of the 2 documented maximum field lengths (73 characters for names and 300
-        # characters for descriptions).
-        #
-        # Note that the source of each of these groups may be either a Lookup group or a Lookup
-        # institution, which are handled the same here. Technically Lookup institutions do not have
-        # descriptions, but the code in ldap.py sets the description from the name for Lookup
-        # institutions, which is useful since some institution names do not fit in the Google name
-        # field.
-        expected_google_group = {
-            'name': _trim_text(
-                managed_group_entry.groupName, maxlen=73, suffix=sync_config.group_name_suffix
-            ),
-            'description': _trim_text(
-                _clean_group_desc(managed_group_entry.description),
-                maxlen=300
-            )
-        }
-
-        # Find existing Google group (if any).
-        existing_google_group = all_google_groups_by_gid.get(gid)
-
-        if existing_google_group is not None:
-            # See if we need to change the existing group
-            # Unless anything needs changing, the patch is empty.
-            patch = {}
-
-            if existing_google_group.get('name') != expected_google_group['name']:
-                patch['name'] = expected_google_group['name']
-            if existing_google_group.get('description') != expected_google_group['description']:
-                patch['description'] = expected_google_group['description']
-
-            # Only record non-empty patches.
-            if len(patch) > 0:
-                google_group_updates[gid] = patch
-        else:
-            # No existing Google group, so create one.
-            google_group_creations[gid] = {
-                'email': gid_to_email(gid),
-                **expected_google_group
-            }
-
-    # Form a set of all the uids which need patching.
-    uids_to_update = set(google_user_updates.keys())
-    LOG.info('Number of existing users to update: %s', len(uids_to_update))
-
-    # Form a set of all the gids which need patching.
-    gids_to_update = set(google_group_updates.keys())
-    LOG.info('Number of existing groups to update: %s', len(gids_to_update))
-
-    # Form a set of all the uids which need adding.
-    uids_to_add = set(google_user_creations.keys())
-    LOG.info('Number of users to add: %s', len(uids_to_add))
-
-    # Form a set of all the gids which need adding.
-    gids_to_add = set(google_group_creations.keys())
-    LOG.info('Number of groups to add: %s', len(gids_to_add))
-
-    # Form a set of all uids which need reactivating. We reactive users who are in the managed user
-    # list *and* the suspended user list.
-    uids_to_reactivate = suspended_google_uids & managed_user_uids
-    LOG.info('Number of users to reactivate: %s', len(uids_to_reactivate))
-
-    # Form a set of all uids which should be suspended. This is all the unsuspended Google uids
-    # which do not appear in our eligible user list.
-    uids_to_suspend = (all_google_uids - suspended_google_uids) - eligible_uids
-    LOG.info('Number of users to suspend: %s', len(uids_to_suspend))
-
-    # Form a set of all gids which need deleting.
-    gids_to_delete = all_google_gids - eligible_gids
-    LOG.info('Number of groups to delete: %s', len(gids_to_delete))
-
-    # For each managed group, determine which members to insert or delete. These are lists of
-    # (gid, uid) tuples.
-    members_to_insert = []
-    members_to_delete = []
-    for gid, managed_group_entry in managed_group_entries_by_gid.items():
-        # Find the existing Google group members.
-        existing_google_group = all_google_groups_by_gid.get(gid)
-        if existing_google_group:
-            existing_members = all_google_members[existing_google_group['id']]
-            existing_member_uids = set([email_to_uid(m['email']) for m in existing_members])
-        else:
-            existing_member_uids = set()
-
-        # Members to insert. This is restricted to the managed user set, so that we don't attempt
-        # to insert a member resource for a non-existent user.
-        insert_uids = (
-            (managed_group_entry.uids - existing_member_uids).intersection(managed_user_uids)
-        )
-        members_to_insert.extend([(gid, uid) for uid in insert_uids])
-
-        # Members to delete. This is restricted to the eligible user set, so that we don't bother
-        # to delete a member resource when the user is suspended (and so we won't need to re-add
-        # it if the user is reactivated).
-        delete_uids = (
-            (existing_member_uids - managed_group_entry.uids).intersection(eligible_uids)
-        )
-        members_to_delete.extend([(gid, uid) for uid in delete_uids])
-
-    LOG.info('Number of group members to insert: %s', len(members_to_insert))
-    LOG.info('Number of group members to delete: %s', len(members_to_delete))
-
-    # --------------------------------------------------------------------------------------------
-    # Enforce limits on how much data to change in Google.
-    # --------------------------------------------------------------------------------------------
-
-    # Calculate percentage change to users, groups and group members.
-    user_change_percentage = 100. * (
-        len(uids_to_add | uids_to_update | uids_to_reactivate | uids_to_suspend)
-        /
-        max(1, len(all_google_uids))
-    )
-    LOG.info('Configuration will modify %.2f%% of users', user_change_percentage)
-
-    group_change_percentage = 100. * (
-        len(gids_to_add | gids_to_update | gids_to_delete)
-        /
-        max(1, len(all_google_gids))
-    )
-    LOG.info('Configuration will modify %.2f%% of groups', group_change_percentage)
-
-    member_change_percentage = 100. * (
-        (len(members_to_insert) + len(members_to_delete))
-        /
-        max(1, sum([len(m) for g, m in all_google_members.items()]))
-    )
-    LOG.info('Configuration will modify %.2f%% of group members', member_change_percentage)
-
-    # Enforce percentage change sanity checks.
-    if (limits_config.abort_user_change_percentage is not None and
-            user_change_percentage > limits_config.abort_user_change_percentage):
-        LOG.error(
-            'Modification of %.2f%% of users is greater than limit of %.2f%%. Aborting.',
-            user_change_percentage, limits_config.abort_user_change_percentage
-        )
-        raise RuntimeError('Aborting due to large user change percentage')
-    if (limits_config.abort_group_change_percentage is not None and
-            group_change_percentage > limits_config.abort_group_change_percentage):
-        LOG.error(
-            'Modification of %.2f%% of groups is greater than limit of %.2f%%. Aborting.',
-            group_change_percentage, limits_config.abort_group_change_percentage
-        )
-        raise RuntimeError('Aborting due to large group change percentage')
-    if (limits_config.abort_member_change_percentage is not None and
-            member_change_percentage > limits_config.abort_member_change_percentage):
-        LOG.error(
-            'Modification of %.2f%% of group members is greater than limit of %.2f%%. Aborting.',
-            member_change_percentage, limits_config.abort_member_change_percentage
-        )
-        raise RuntimeError('Aborting due to large group member change percentage')
-
-    # Cap maximum size of various operations.
-    if limits_config.max_new_users is not None and len(uids_to_add) > limits_config.max_new_users:
-        # Ensure that we do not attempt to insert a group member for any of the users not added as
-        # a result of this cap, since these users won't exist in Google
-        capped_uids_to_add = _limit(uids_to_add, limits_config.max_new_users)
-        uids_not_added = uids_to_add - capped_uids_to_add
-        members_to_insert = [(g, u) for g, u in members_to_insert if u not in uids_not_added]
-        uids_to_add = capped_uids_to_add
-        LOG.info('Capped number of new users to %s', len(uids_to_add))
-    if (limits_config.max_new_groups is not None and
-            len(gids_to_add) > limits_config.max_new_groups):
-        # Ensure that we do not attempt to insert a group member for any of the groups not added
-        # as a result of this cap, since these groups won't exist in Google
-        capped_gids_to_add = _limit(gids_to_add, limits_config.max_new_groups)
-        gids_not_added = gids_to_add - capped_gids_to_add
-        members_to_insert = [(g, u) for g, u in members_to_insert if g not in gids_not_added]
-        gids_to_add = capped_gids_to_add
-        LOG.info('Capped number of new groups to %s', len(gids_to_add))
-    if (limits_config.max_suspended_users is not None and
-            len(uids_to_suspend) > limits_config.max_suspended_users):
-        uids_to_suspend = _limit(uids_to_suspend, limits_config.max_suspended_users)
-        LOG.info('Capped number of users to suspend to %s', len(uids_to_suspend))
-    if (limits_config.max_deleted_groups is not None and
-            len(gids_to_delete) > limits_config.max_deleted_groups):
-        gids_to_delete = _limit(gids_to_delete, limits_config.max_deleted_groups)
-        LOG.info('Capped number of groups to delete to %s', len(gids_to_delete))
-    if (limits_config.max_reactivated_users is not None and
-            len(uids_to_reactivate) > limits_config.max_reactivated_users):
-        uids_to_reactivate = _limit(uids_to_reactivate, limits_config.max_reactivated_users)
-        LOG.info('Capped number of users to reactivate to %s', len(uids_to_reactivate))
-    if (limits_config.max_updated_users is not None and
-            len(uids_to_update) > limits_config.max_updated_users):
-        uids_to_update = _limit(uids_to_update, limits_config.max_updated_users)
-        LOG.info('Capped number of users to update to %s', len(uids_to_update))
-    if (limits_config.max_updated_groups is not None and
-            len(gids_to_update) > limits_config.max_updated_groups):
-        gids_to_update = _limit(gids_to_update, limits_config.max_updated_groups)
-        LOG.info('Capped number of groups to update to %s', len(gids_to_update))
-    if (limits_config.max_inserted_members is not None and
-            len(members_to_insert) > limits_config.max_inserted_members):
-        members_to_insert = members_to_insert[0:limits_config.max_inserted_members]
-        LOG.info('Capped number of group members to insert to %s', len(members_to_insert))
-    if (limits_config.max_deleted_members is not None and
-            len(members_to_delete) > limits_config.max_deleted_members):
-        members_to_delete = members_to_delete[0:limits_config.max_deleted_members]
-        LOG.info('Capped number of group members to delete to %s', len(members_to_delete))
-
-    # --------------------------------------------------------------------------------------------
-    # Finally, perform the actual updates in Google.
-    # --------------------------------------------------------------------------------------------
-
-    # A generator which will generate patch() and insert() calls to the directory service to
-    # perform the actions required to update users
-    def user_api_requests():
-        # Update existing users.
-        user_updates = {uid: google_user_updates[uid] for uid in uids_to_update}
-        for uid, update in user_updates.items():
-            google_id = all_google_users_by_uid[uid]['id']
-            # Only show the previous parts of name that have been changed
-            updated_google_user_name = update.get('name', {})
-            previous_google_user_name = all_google_users_by_uid[uid].get('name', {})
-            previous = {
-                k: previous_google_user_name.get(k, '')
-                for k in ['givenName', 'familyName']
-                if k in updated_google_user_name
-            }
-            LOG.info('Update user "%s": "%r" from "%r"', uid, update, previous)
-            yield directory_service.users().patch(userKey=google_id, body=update)
-
-        # Suspend old users
-        for uid in uids_to_suspend:
-            google_id = all_google_users_by_uid[uid]['id']
-            LOG.info('Suspending user: "%s"', uid)
-            yield directory_service.users().patch(userKey=google_id, body={'suspended': True})
-
-        # Reactivate returning users
-        for uid in uids_to_reactivate:
-            google_id = all_google_users_by_uid[uid]['id']
-            LOG.info('Reactivating user: "%s"', uid)
-            yield directory_service.users().patch(userKey=google_id, body={'suspended': False})
-
-        # Create new users
-        for uid in uids_to_add:
-            # Generate a random password which is thrown away.
-            new_user = {**{
-                'hashFunction': 'crypt',
-                'password': crypt.crypt(secrets.token_urlsafe(), crypt.METHOD_SHA512),
-                'orgUnitPath': sync_config.new_user_org_unit_path,
-            }, **google_user_creations[uid]}
-            redacted_user = {**new_user, **{'password': 'REDACTED'}}
-            LOG.info('Adding user "%s": %s', uid, redacted_user)
-            yield directory_service.users().insert(body=new_user)
-
-    # A generator which will generate patch(), insert() and delete() calls to the directory
-    # service to perform the actions required to update groups
-    def group_api_requests():
-        # Update existing groups
-        group_updates = {gid: google_group_updates[gid] for gid in gids_to_update}
-        for gid, update in group_updates.items():
-            google_id = all_google_groups_by_gid[gid]['id']
-            LOG.info('Update group "%s": "%r"', gid, update)
-            yield directory_service.groups().patch(groupKey=google_id, body=update)
-
-        # Delete cancelled groups
-        for gid in gids_to_delete:
-            google_id = all_google_groups_by_gid[gid]['id']
-            LOG.info('Deleting group: "%s"', gid)
-            yield directory_service.groups().delete(groupKey=google_id)
-
-        # Create new groups
-        for gid in gids_to_add:
-            new_group = google_group_creations[gid]
-            LOG.info('Adding group "%s": %s', gid, new_group)
-            yield directory_service.groups().insert(body=new_group)
-
-    # A generator which will generate patch() calls to the groupssettings service to set or
-    # update the required group settings.
-    def group_settings_api_requests():
-        # Apply all settings to new groups.
-        for gid in gids_to_add:
-            email = gid_to_email(gid)
-            settings = sync_config.group_settings
-            LOG.info('Updating settings for new group "%s": %s', gid, settings)
-            yield groupssettings_service.groups().patch(groupUniqueId=email, body=settings)
-
-        if group_settings:
-            # Apply any settings that differ to pre-existing groups.
-            for gid, settings in all_google_group_settings_by_gid.items():
-                patch = {k: v for k, v in sync_config.group_settings.items()
-                         if settings.get(k) != v}
-                if patch:
-                    email = gid_to_email(gid)
-                    LOG.info('Updating settings for existing group "%s": %s', gid, patch)
-                    yield groupssettings_service.groups().patch(groupUniqueId=email, body=patch)
-        else:
-            LOG.info('Skipping updating settings for existing groups')
-
-    # A generator which will generate insert() and delete() calls to the directory service to
-    # perform the actions required to update group members
-    def member_api_requests():
-        # Insert new members
-        for gid, uid in members_to_insert:
-            group_key = gid_to_email(gid)
-            user_key = uid_to_email(uid)
-            LOG.info('Adding user "%s" to group "%s"', user_key, group_key)
-            yield directory_service.members().insert(groupKey=group_key, body={'email': user_key})
-
-        # Delete removed members
-        for gid, uid in members_to_delete:
-            group_key = gid_to_email(gid)
-            user_key = uid_to_email(uid)
-            LOG.info('Removing user "%s" from group "%s"', user_key, group_key)
-            yield directory_service.members().delete(groupKey=group_key, memberKey=user_key)
-
-    # Process an iterable list of requests to the specified Google service in batches. These APIs
-    # support a maximum batch size of 1000. See:
-    # https://developers.google.com/admin-sdk/directory/v1/guides/batch
-    def process_requests(service, requests):
-        for request_batch in _grouper(requests, n=sync_config.batch_size):
-            # Form batch request.
-            batch = service.new_batch_http_request()
-            for request in request_batch:
-                batch.add(request, callback=_handle_batch_response)
-
-            # Execute the batch request if not in read only mode. Otherwise log that we would
-            # have.
-            if not read_only:
-                LOG.info('Issuing batch request to Google.')
-                time.sleep(sync_config.inter_batch_delay)
-                retries = sync_config.http_retries
-                while True:
-                    try:
-                        batch.execute()
-                    except errors.HttpError as err:
-                        if (err.resp.status == 503 and retries > 0):
-                            retries -= 1
-                            LOG.warn('503: Service unavailable - retrying')
-                            time.sleep(sync_config.http_retry_delay)
-                            continue
-                        if retries == 0:
-                            LOG.error('503: Service unavailable - retry count exceeded')
-                        raise
-                    break
-            else:
-                LOG.info('Not issuing batch request in read-only mode.')
-
-    # Process all the user, group and group member updates
-    process_requests(directory_service, user_api_requests())
-    if not just_users:
-        process_requests(directory_service, group_api_requests())
-        process_requests(groupssettings_service, group_settings_api_requests())
-        process_requests(directory_service, member_api_requests())
-
-
-def _handle_batch_response(request_id, response, exception):
-    if exception is not None:
-        LOG.error('Error performing request: %s', exception)
-        LOG.error('Response: %r', response)
-
-
-def _limit(s, limit):
-    """
-    Given a set, s, and a numeric limit, return a set which has no more than *limit* elements. The
-    exact set of elements retained is not specified.
-
-    >>> s = set('ABCDEFGHIJKLMNOPQ')
-    >>> len(s) > 5
-    True
-    >>> len(_limit(s, 5)) == 5
-    True
-    >>> len(_limit(s, 500)) == len(s)
-    True
-
-    All elements of the returned set are taken from input set.
-
-    >>> s_prime = _limit(s, 5)
-    >>> s_prime - s
-    set()
-
-    """
-    return {e for _, e in itertools.takewhile(lambda p: p[0] < limit, enumerate(s))}
-
-
-def _grouper(iterable, *, n):
-    """
-    Group an iterable into chunks of at most *n* elements. A generator which yields iterables
-    representing slices of *iterable*.
-
-    >>> [list(i) for i in _grouper('ABCDEFGH', n=3)]
-    [['A', 'B', 'C'], ['D', 'E', 'F'], ['G', 'H']]
-    >>> def generator(stop):
-    ...     for x in range(stop):
-    ...         yield x
-    >>> [list(i) for i in _grouper(generator(10), n=3)]
-    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
-    >>> [list(i) for i in _grouper(generator(12), n=3)]
-    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
-
-    The implementation of this function attempts to be efficient; the chunks are iterables which
-    are generated on demand rather than being constructed first. Hence this function can deal with
-    iterables which would fill memory if intermediate chunks were stored.
-
-    >>> i = _grouper(generator(100000000000000000000), n=1000000000000000)
-    >>> next(next(i))
-    0
-
-    """
-    it = iter(iterable)
-    while True:
-        next_chunk_it = itertools.islice(it, n)
-        try:
-            first = next(next_chunk_it)
-        except StopIteration:
-            return
-        yield itertools.chain((first,), next_chunk_it)
-
-
-def _trim_text(text, *, maxlen, cont='...', suffix=''):
-    """
-    Trim text to be no more than "maxlen" characters long, terminating it with "cont" if it had
-    to be truncated. If supplied, "suffix" is appended to the string after truncating, and the
-    truncation point adjusted so that the total length remains less than "maxlen".
-
-    """
-    return (
-        text[0:maxlen-len(cont)-len(suffix)]+cont+suffix
-        if len(text)+len(suffix) > maxlen else text+suffix
-    )
-
-
-def _clean_group_desc(s):
-    """
-    Clean any "bad characters" in group descriptions.
-
-    Google support (https://support.google.com/a/answer/9193374) says:
-    "descriptions can’t contain equal signs (=), or brackets (<,>)"
-
-    >>> _clean_group_desc('a<b>c=d')
-    'abcd'
-
-    """
-    return ''.join(c for c in s if c not in _CLEAN_GROUP_DESC_BAD_CHARS)
-
-
-# Characters stripped by _clean_group_desc. Present as a constant to avoid re-creating it.
-_CLEAN_GROUP_DESC_BAD_CHARS = '=<>'
diff --git a/gsuitesync/sync/__init__.py b/gsuitesync/sync/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e702854c2df0229d0353d13e7314fb60e9329a15
--- /dev/null
+++ b/gsuitesync/sync/__init__.py
@@ -0,0 +1 @@
+from .main import sync  # noqa: F401
diff --git a/gsuitesync/sync/base.py b/gsuitesync/sync/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba6cd664157f56aabf0a2df924339a590611315f
--- /dev/null
+++ b/gsuitesync/sync/base.py
@@ -0,0 +1,14 @@
+"""
+Base classes for retrievers, comparator and updater classes that consume configuration and state.
+
+"""
+
+
+class ConfigurationStateConsumer:
+    required_config = None
+
+    def __init__(self, configuration, state):
+        # For convenience, create properties for required configuration
+        for c in (self.required_config if self.required_config is not None else []):
+            setattr(self, f'{c}_config', configuration.get(c, {}))
+        self.state = state
diff --git a/gsuitesync/sync/compare.py b/gsuitesync/sync/compare.py
new file mode 100644
index 0000000000000000000000000000000000000000..02f62a533bc93c71b2bc7c4239fc687455bf8c1d
--- /dev/null
+++ b/gsuitesync/sync/compare.py
@@ -0,0 +1,417 @@
+"""
+Compute differences between the Lookup and Google data.
+
+"""
+import logging
+import itertools
+
+from .. import naming
+from .base import ConfigurationStateConsumer
+from .utils import uid_to_email, gid_to_email, email_to_uid
+
+LOG = logging.getLogger(__name__)
+
+
+class Comparator(ConfigurationStateConsumer):
+    required_config = ('gapi_domain', 'sync', 'limits')
+
+    def compare_users(self):
+        # For each user which exists in Google or the managed user set which is eligible,
+        # determine if they need updating/creating. If so, record a patch/insert for the user.
+        LOG.info('Calculating updates to users...')
+        google_user_updates = {}
+        google_user_creations = {}
+        for uid, managed_user_entry in self.state.managed_user_entries_by_uid.items():
+            # Heuristically determine the given and family names.
+            names = naming.get_names(
+                uid=uid, display_name=managed_user_entry.displayName, cn=managed_user_entry.cn,
+                sn=managed_user_entry.sn, given_name=managed_user_entry.givenName)
+
+            # Form expected user resource fields.
+            expected_google_user = {
+                'name': {
+                    'givenName': names.given_name,
+                    'familyName': names.family_name,
+                },
+            }
+
+            # Find existing Google user (if any).
+            existing_google_user = self.state.all_google_users_by_uid.get(uid)
+
+            if existing_google_user is not None:
+                # See if we need to change the existing user
+                # Unless anything needs changing, the patch is empty.
+                patch = {}
+
+                # Determine how to patch user's name.
+                google_name = existing_google_user.get('name', {})
+                patch_name = {}
+                if google_name.get('givenName') != expected_google_user['name']['givenName']:
+                    patch_name['givenName'] = names.given_name
+                if google_name.get('familyName') != expected_google_user['name']['familyName']:
+                    patch_name['familyName'] = names.family_name
+                if len(patch_name) > 0:
+                    patch['name'] = patch_name
+
+                # Only record non-empty patches.
+                if len(patch) > 0:
+                    google_user_updates[uid] = patch
+            else:
+                # No existing Google user. Record the new resource. Generate a new user password
+                # and send Google the hash. It doesn't matter what this password is since we never
+                # have the user log in with it. For password-only applications the user can make
+                # use of an application-specific password.
+                new_user = {
+                    'primaryEmail': uid_to_email(uid, self.gapi_domain_config.name),
+                    **expected_google_user,
+                }
+                google_user_creations[uid] = new_user
+
+        # Form a set of all the uids which need patching.
+        uids_to_update = set(google_user_updates.keys())
+        LOG.info('Number of existing users to update: %s', len(uids_to_update))
+
+        # Form a set of all the uids which need adding.
+        uids_to_add = set(google_user_creations.keys())
+        LOG.info('Number of users to add: %s', len(uids_to_add))
+
+        # Form a set of all uids which need reactivating. We reactive users who are in the managed
+        # user list *and* the suspended user list.
+        uids_to_reactivate = self.state.suspended_google_uids & self.state.managed_user_uids
+        LOG.info('Number of users to reactivate: %s', len(uids_to_reactivate))
+
+        # Form a set of all uids which should be suspended. This is all the unsuspended Google uids
+        # which do not appear in our eligible user list.
+        uids_to_suspend = (
+            (self.state.all_google_uids - self.state.suspended_google_uids)
+            - self.state.eligible_uids
+        )
+        LOG.info('Number of users to suspend: %s', len(uids_to_suspend))
+
+        self.state.update({
+            'google_user_updates': google_user_updates,
+            'google_user_creations': google_user_creations,
+            'uids_to_update': uids_to_update,
+            'uids_to_add': uids_to_add,
+            'uids_to_reactivate': uids_to_reactivate,
+            'uids_to_suspend': uids_to_suspend,
+        })
+
+    def compare_groups(self):
+        # For each group which exists in Google or the managed group set which is eligible,
+        # determine if it needs updating/creating. If so, record a patch/insert for the group.
+        LOG.info('Calculating updates to groups...')
+        google_group_updates = {}
+        google_group_creations = {}
+        for gid, managed_group_entry in self.state.managed_group_entries_by_gid.items():
+            # Form expected group resource fields. The 2 Google APIs we use here to update groups
+            # in Google each have different maximum lengths for group names and descriptions, and
+            # empirically the APIs don't function properly if either limit is exceeded, so we use
+            # the minimum of the 2 documented maximum field lengths (73 characters for names and
+            # 300 characters for descriptions).
+            #
+            # Note that the source of each of these groups may be either a Lookup group or a Lookup
+            # institution, which are handled the same here. Technically Lookup institutions do not
+            # have descriptions, but the code in ldap.py sets the description from the name for
+            # Lookup institutions, which is useful since some institution names do not fit in the
+            # Google name field.
+            expected_google_group = {
+                'name': _trim_text(
+                    managed_group_entry.groupName, maxlen=73,
+                    suffix=self.sync_config.group_name_suffix
+                ),
+                'description': _trim_text(
+                    _clean_group_desc(managed_group_entry.description),
+                    maxlen=300
+                )
+            }
+
+            # Find existing Google group (if any).
+            existing_google_group = self.state.all_google_groups_by_gid.get(gid)
+
+            if existing_google_group is not None:
+                # See if we need to change the existing group
+                # Unless anything needs changing, the patch is empty.
+                patch = {}
+
+                if existing_google_group.get('name') != expected_google_group['name']:
+                    patch['name'] = expected_google_group['name']
+                if (existing_google_group.get('description') !=
+                        expected_google_group['description']):
+                    patch['description'] = expected_google_group['description']
+
+                # Only record non-empty patches.
+                if len(patch) > 0:
+                    google_group_updates[gid] = patch
+            else:
+                # No existing Google group, so create one.
+                google_group_creations[gid] = {
+                    'email': gid_to_email(gid, self.state.groups_domain, self.state.insts_domain),
+                    **expected_google_group
+                }
+
+        # Form a set of all the gids which need patching.
+        gids_to_update = set(google_group_updates.keys())
+        LOG.info('Number of existing groups to update: %s', len(gids_to_update))
+
+        # Form a set of all the gids which need adding.
+        gids_to_add = set(google_group_creations.keys())
+        LOG.info('Number of groups to add: %s', len(gids_to_add))
+
+        # Form a set of all gids which need deleting.
+        gids_to_delete = self.state.all_google_gids - self.state.eligible_gids
+        LOG.info('Number of groups to delete: %s', len(gids_to_delete))
+
+        # For each managed group, determine which members to insert or delete. These are lists of
+        # (gid, uid) tuples.
+        members_to_insert = []
+        members_to_delete = []
+        for gid, managed_group_entry in self.state.managed_group_entries_by_gid.items():
+            # Find the existing Google group members.
+            existing_google_group = self.state.all_google_groups_by_gid.get(gid)
+            if existing_google_group:
+                existing_members = self.state.all_google_members[existing_google_group['id']]
+                existing_member_uids = set([email_to_uid(m['email']) for m in existing_members])
+            else:
+                existing_member_uids = set()
+
+            # Members to insert. This is restricted to the managed user set, so that we don't
+            # attempt to insert a member resource for a non-existent user.
+            insert_uids = (
+                (managed_group_entry.uids - existing_member_uids)
+                .intersection(self.state.managed_user_uids)
+            )
+            members_to_insert.extend([(gid, uid) for uid in insert_uids])
+
+            # Members to delete. This is restricted to the eligible user set, so that we don't
+            # bother to delete a member resource when the user is suspended (and so we won't need
+            # to re-add it if the user is reactivated).
+            delete_uids = (
+                (existing_member_uids - managed_group_entry.uids)
+                .intersection(self.state.eligible_uids)
+            )
+            members_to_delete.extend([(gid, uid) for uid in delete_uids])
+
+        LOG.info('Number of group members to insert: %s', len(members_to_insert))
+        LOG.info('Number of group members to delete: %s', len(members_to_delete))
+
+        self.state.update({
+            'google_group_updates': google_group_updates,
+            'google_group_creations': google_group_creations,
+            'gids_to_update': gids_to_update,
+            'gids_to_add': gids_to_add,
+            'gids_to_delete': gids_to_delete,
+            'members_to_insert': members_to_insert,
+            'members_to_delete': members_to_delete,
+        })
+
+    def compare_groups_settings(self):
+        # Determine changes to existing group settings
+        group_settings_to_update = {}
+        for gid, settings in self.state.all_google_group_settings_by_gid.items():
+            patch = {
+                k: v for k, v in self.sync_config.group_settings.items()
+                if settings.get(k) != v
+            }
+            if len(patch) > 0:
+                group_settings_to_update[gid] = patch
+
+        gids_to_update_group_settings = set(group_settings_to_update.keys())
+        LOG.info('Number of existing groups to update settings: %s',
+                 len(gids_to_update_group_settings))
+
+        self.state.update({
+            'group_settings_to_update': group_settings_to_update,
+            'gids_to_update_group_settings': gids_to_update_group_settings,
+        })
+
+    def enforce_limits(self, just_users):
+        # --------------------------------------------------------------------------------------------
+        # Enforce limits on how much data to change in Google.
+        # --------------------------------------------------------------------------------------------
+
+        # Calculate percentage change to users, groups and group members.
+        user_change_percentage = 100. * (
+            len(self.state.uids_to_add | self.state.uids_to_update |
+                self.state.uids_to_reactivate | self.state.uids_to_suspend)
+            /
+            max(1, len(self.state.all_google_uids))
+        )
+        LOG.info('Configuration will modify %.2f%% of users', user_change_percentage)
+
+        if not just_users:
+            group_change_percentage = 100. * (
+                len(self.state.gids_to_add | self.state.gids_to_update | self.state.gids_to_delete)
+                /
+                max(1, len(self.state.all_google_gids))
+            )
+            LOG.info('Configuration will modify %.2f%% of groups', group_change_percentage)
+
+            member_change_percentage = 100. * (
+                (len(self.state.members_to_insert) + len(self.state.members_to_delete))
+                /
+                max(1, sum([len(m) for g, m in self.state.all_google_members.items()]))
+            )
+            LOG.info('Configuration will modify %.2f%% of group members', member_change_percentage)
+
+        # Enforce percentage change sanity checks.
+        if (self.limits_config.abort_user_change_percentage is not None and
+                user_change_percentage > self.limits_config.abort_user_change_percentage):
+            LOG.error(
+                'Modification of %.2f%% of users is greater than limit of %.2f%%. Aborting.',
+                user_change_percentage, self.limits_config.abort_user_change_percentage
+            )
+            raise RuntimeError('Aborting due to large user change percentage')
+
+        if not just_users:
+            if (self.limits_config.abort_group_change_percentage is not None and
+                    group_change_percentage > self.limits_config.abort_group_change_percentage):
+                LOG.error(
+                    'Modification of %.2f%% of groups is greater than limit of %.2f%%. Aborting.',
+                    group_change_percentage, self.limits_config.abort_group_change_percentage
+                )
+                raise RuntimeError('Aborting due to large group change percentage')
+            if (self.limits_config.abort_member_change_percentage is not None and
+                    member_change_percentage > self.limits_config.abort_member_change_percentage):
+                LOG.error(
+                    'Modification of %.2f%% of group members is greater than limit of %.2f%%. '
+                    'Aborting.',
+                    member_change_percentage, self.limits_config.abort_member_change_percentage
+                )
+                raise RuntimeError('Aborting due to large group member change percentage')
+
+        # Cap maximum size of various operations.
+        if (self.limits_config.max_new_users is not None
+                and len(self.state.uids_to_add) > self.limits_config.max_new_users):
+            # Ensure that we do not attempt to insert a group member for any of the users not
+            # added as a result of this cap, since these users won't exist in Google
+            capped_uids_to_add = _limit(self.state.uids_to_add, self.limits_config.max_new_users)
+            uids_not_added = self.state.uids_to_add - capped_uids_to_add
+            if not just_users:
+                self.state.members_to_insert = [
+                    (g, u) for g, u in self.state.members_to_insert if u not in uids_not_added
+                ]
+            self.state.uids_to_add = capped_uids_to_add
+            LOG.info('Capped number of new users to %s', len(self.state.uids_to_add))
+
+        if (self.limits_config.max_suspended_users is not None and
+                len(self.state.uids_to_suspend) > self.limits_config.max_suspended_users):
+            self.state.uids_to_suspend = _limit(
+                self.state.uids_to_suspend, self.limits_config.max_suspended_users
+            )
+            LOG.info('Capped number of users to suspend to %s', len(self.state.uids_to_suspend))
+        if (self.limits_config.max_reactivated_users is not None and
+                len(self.state.uids_to_reactivate) > self.limits_config.max_reactivated_users):
+            self.state.uids_to_reactivate = _limit(
+                self.state.uids_to_reactivate, self.limits_config.max_reactivated_users
+            )
+            LOG.info(
+                'Capped number of users to reactivate to %s',
+                len(self.state.uids_to_reactivate)
+            )
+        if (self.limits_config.max_updated_users is not None and
+                len(self.state.uids_to_update) > self.limits_config.max_updated_users):
+            self.state.uids_to_update = _limit(
+                self.state.uids_to_update, self.limits_config.max_updated_users
+            )
+            LOG.info('Capped number of users to update to %s', len(self.state.uids_to_update))
+
+        if not just_users:
+            if (self.limits_config.max_new_groups is not None and
+                    len(self.state.gids_to_add) > self.limits_config.max_new_groups):
+                # Ensure that we do not attempt to insert a group member for any of the groups not
+                # added as a result of this cap, since these groups won't exist in Google
+                capped_gids_to_add = _limit(
+                    self.state.gids_to_add, self.limits_config.max_new_groups
+                )
+                gids_not_added = self.state.gids_to_add - capped_gids_to_add
+                self.state.members_to_insert = [
+                    (g, u) for g, u in self.state.members_to_insert if g not in gids_not_added
+                ]
+                self.state.gids_to_add = capped_gids_to_add
+                LOG.info('Capped number of new groups to %s', len(self.state.gids_to_add))
+
+            if (self.limits_config.max_deleted_groups is not None and
+                    len(self.state.gids_to_delete) > self.limits_config.max_deleted_groups):
+                self.state.gids_to_delete = _limit(
+                    self.state.gids_to_delete, self.limits_config.max_deleted_groups
+                )
+                LOG.info('Capped number of groups to delete to %s', len(self.state.gids_to_delete))
+            if (self.limits_config.max_updated_groups is not None and
+                    len(self.state.gids_to_update) > self.limits_config.max_updated_groups):
+                self.state.gids_to_update = _limit(
+                    self.state.gids_to_update, self.limits_config.max_updated_groups
+                )
+                LOG.info('Capped number of groups to update to %s', len(self.state.gids_to_update))
+            if (self.limits_config.max_inserted_members is not None and
+                    len(self.state.members_to_insert) > self.limits_config.max_inserted_members):
+                self.state.members_to_insert = (
+                    self.state.members_to_insert[0:self.limits_config.max_inserted_members]
+                )
+                LOG.info(
+                    'Capped number of group members to insert to %s',
+                    len(self.state.members_to_insert)
+                )
+            if (self.limits_config.max_deleted_members is not None and
+                    len(self.state.members_to_delete) > self.limits_config.max_deleted_members):
+                self.state.members_to_delete = (
+                    self.state.members_to_delete[0:self.limits_config.max_deleted_members]
+                )
+                LOG.info(
+                    'Capped number of group members to delete to %s',
+                    len(self.state.members_to_delete)
+                )
+
+
+def _limit(s, limit):
+    """
+    Given a set, s, and a numeric limit, return a set which has no more than *limit* elements. The
+    exact set of elements retained is not specified.
+
+    >>> s = set('ABCDEFGHIJKLMNOPQ')
+    >>> len(s) > 5
+    True
+    >>> len(_limit(s, 5)) == 5
+    True
+    >>> len(_limit(s, 500)) == len(s)
+    True
+
+    All elements of the returned set are taken from input set.
+
+    >>> s_prime = _limit(s, 5)
+    >>> s_prime - s
+    set()
+
+    """
+    return {e for _, e in itertools.takewhile(lambda p: p[0] < limit, enumerate(s))}
+
+
+def _trim_text(text, *, maxlen, cont='...', suffix=''):
+    """
+    Trim text to be no more than "maxlen" characters long, terminating it with "cont" if it had
+    to be truncated. If supplied, "suffix" is appended to the string after truncating, and the
+    truncation point adjusted so that the total length remains less than "maxlen".
+
+    """
+    return (
+        text[0:maxlen-len(cont)-len(suffix)]+cont+suffix
+        if len(text)+len(suffix) > maxlen else text+suffix
+    )
+
+
+def _clean_group_desc(s):
+    """
+    Clean any "bad characters" in group descriptions.
+
+    Google support (https://support.google.com/a/answer/9193374) says:
+    "descriptions can’t contain equal signs (=), or brackets (<,>)"
+
+    >>> _clean_group_desc('a<b>c=d')
+    'abcd'
+
+    """
+    return ''.join(c for c in s if c not in _CLEAN_GROUP_DESC_BAD_CHARS)
+
+
+# Characters stripped by _clean_group_desc. Present as a constant to avoid re-creating it.
+_CLEAN_GROUP_DESC_BAD_CHARS = '=<>'
diff --git a/gsuitesync/sync/gapi.py b/gsuitesync/sync/gapi.py
new file mode 100644
index 0000000000000000000000000000000000000000..65c9f137ee243d623db17cbb1e7e26bd4a27ac02
--- /dev/null
+++ b/gsuitesync/sync/gapi.py
@@ -0,0 +1,267 @@
+"""
+Load current user, group and institution data from Google.
+
+"""
+import logging
+import re
+
+from google.oauth2 import service_account
+from googleapiclient import discovery
+
+from .base import ConfigurationStateConsumer
+from .. import gapiutil
+from .utils import email_to_uid, email_to_gid, groupID_regex, instID_regex
+
+
+LOG = logging.getLogger(__name__)
+
+# Scopes required to perform read-only actions.
+READ_ONLY_SCOPES = [
+    'https://www.googleapis.com/auth/admin.directory.user.readonly',
+    'https://www.googleapis.com/auth/admin.directory.group.readonly',
+    'https://www.googleapis.com/auth/admin.directory.group.member.readonly',
+    'https://www.googleapis.com/auth/apps.groups.settings'
+]
+
+# Scopes *in addition to READ_ONLY_SCOPES* required to perform a full update.
+WRITE_SCOPES = [
+    'https://www.googleapis.com/auth/admin.directory.user',
+    'https://www.googleapis.com/auth/admin.directory.group',
+    'https://www.googleapis.com/auth/admin.directory.group.member'
+]
+
+
+class GAPIRetriever(ConfigurationStateConsumer):
+    required_config = ('gapi_auth', 'gapi_domain', 'sync')
+
+    def connect(self, read_only=True):
+        # load credentials
+        self.creds = self._get_credentials(read_only)
+        # Build the directory service using Google API discovery.
+        directory_service = discovery.build('admin', 'directory_v1', credentials=self.creds)
+
+        # Secondary domain for Google groups that come from Lookup groups
+        groups_domain = (
+            self.gapi_domain_config.groups_domain
+            if self.gapi_domain_config.groups_domain is not None
+            else self.gapi_domain_config.name
+        )
+        # Secondary domain for Google groups that come from Lookup institutions
+        insts_domain = (
+            self.gapi_domain_config.insts_domain
+            if self.gapi_domain_config.insts_domain is not None
+            else self.gapi_domain_config.name
+        )
+
+        # Return components needed for connection with Google API
+        self.state.update({
+            'directory_service': directory_service,
+            'groups_domain': groups_domain,
+            'insts_domain': insts_domain,
+        })
+
+    def _get_credentials(self, read_only):
+        """
+        Create a Google credentials object from the configuration. Use *read_only* to indicate if
+        read-only credentials are preferred.
+
+        """
+        # Load appropriate Google credentials.
+        creds_file = self.gapi_auth_config.credentials
+        if read_only and self.gapi_auth_config.read_only_credentials is not None:
+            creds = self.gapi_auth_config.read_only_credentials
+            LOG.info('Using read-only credentials.')
+
+        LOG.info('Loading Google account credentials from "%s"', creds_file)
+        creds = service_account.Credentials.from_service_account_file(creds_file)
+
+        # With scopes based on read_only
+        creds = creds.with_scopes(READ_ONLY_SCOPES + ([] if read_only else WRITE_SCOPES))
+
+        # Use admin_user if using service account with Domain-Wide Delegation
+        if self.gapi_domain_config.admin_user:
+            creds = creds.with_subject(self.gapi_domain_config.admin_user)
+
+        return creds
+
+    def retrieve_users(self):
+        # Retrieve information on all users excluding domain admins.
+        LOG.info('Getting information on Google domain users')
+        fields = [
+            'id', 'isAdmin', 'orgUnitPath', 'primaryEmail', 'suspended', 'suspensionReason',
+            'name(givenName, familyName)',
+        ]
+        all_google_users = gapiutil.list_all(
+            self.state.directory_service.users().list, items_key='users',
+            domain=self.gapi_domain_config.name,
+            query='isAdmin=false', fields='nextPageToken,users(' + ','.join(fields) + ')',
+            retries=self.sync_config.http_retries, retry_delay=self.sync_config.http_retry_delay,
+        )
+        # Strip any "to be ignored" users out of the results.
+        if self.sync_config.ignore_google_org_unit_path_regex is not None:
+            LOG.info(
+                'Ignoring users whose organization unit path matches %r',
+                self.sync_config.ignore_google_org_unit_path_regex)
+            # Check that all users have an orgUnitPath
+            missing_org = [
+                u for u in all_google_users if 'orgUnitPath' not in u
+            ]
+            if len(missing_org) != 0:
+                LOG.error(
+                    'User entries missing orgUnitPath: %s (starting with %s)', len(missing_org),
+                    missing_org[0]['primaryEmail'] if 'primaryEmail' in missing_org[0]
+                    else 'user with blank email'
+                )
+                raise RuntimeError('Sanity check failed: at least one user is missing orgUnitPath')
+            # Remove users matching regex
+            regex = re.compile(self.sync_config.ignore_google_org_unit_path_regex)
+            all_google_users = [
+                u for u in all_google_users if not regex.match(u['orgUnitPath'])
+            ]
+
+        # Strip out any users with uids (extracted from the local-part of the email address) that
+        # aren't valid CRSids. These users can't have come from Lookup, and so should not be
+        # managed (suspended) by this script.
+        all_google_users = [
+            u for u in all_google_users if email_to_uid(u['primaryEmail'])
+        ]
+
+        # Sanity check. There should be no admins in the returned results.
+        if any(u.get('isAdmin', False) for u in all_google_users):
+            raise RuntimeError('Sanity check failed: admin users in user list')
+
+        # Form mappings from uid to Google user.
+        all_google_users_by_uid = {
+            email_to_uid(u['primaryEmail']): u for u in all_google_users
+        }
+
+        # Form sets of all Google-side uids. The all_google_uids set is all users including
+        # the suspended ones and the suspended_google_uids set is only the suspended users. Non
+        # suspended users are therefore all_google_uids - suspended_google_uids.
+        all_google_uids = set(all_google_users_by_uid.keys())
+        suspended_google_uids = {
+            uid for uid, u in all_google_users_by_uid.items() if u['suspended']
+        }
+
+        # Sanity check. We should not have lost anything. (I.e. the uids should be unique.)
+        if len(all_google_uids) != len(all_google_users):
+            raise RuntimeError('Sanity check failed: user list changed length')
+
+        # Log some stats.
+        LOG.info('Total Google users: %s', len(all_google_uids))
+        LOG.info(
+            'Suspended Google users: %s',
+            sum(1 if u['suspended'] else 0 for u in all_google_users)
+        )
+
+        self.state.update({
+            'all_google_users': all_google_users,
+            'all_google_users_by_uid': all_google_users_by_uid,
+            'all_google_uids': all_google_uids,
+            'suspended_google_uids': suspended_google_uids,
+        })
+
+    def retrieve_groups(self):
+        # Retrieve information on all Google groups that come from Lookup groups
+        LOG.info('Getting information on Google domain groups')
+        all_google_groups = [
+            g for g in self._fetch_groups(self.state.groups_domain)
+            if groupID_regex.match(g['email'].split('@')[0])
+        ]
+
+        # Append information on all Google groups that come from Lookup institutions
+        LOG.info('Getting information on Google domain institutions')
+        all_google_groups.extend([
+            g for g in self._fetch_groups(self.state.insts_domain)
+            if instID_regex.match(g['email'].split('@')[0].upper())
+        ])
+
+        # Strip out any groups whose email addresses don't match the pattern for groups created
+        # from Lookup groupIDs or instIDs, and which therefore should not be managed (deleted) by
+        # this script.
+        all_google_groups = [g for g in all_google_groups if email_to_gid(g['email'])]
+
+        # Form mappings from gid to Google group.
+        all_google_groups_by_gid = {
+            email_to_gid(g['email']): g for g in all_google_groups
+        }
+
+        # Form sets of all Google-side gids. The all_google_gids set includes both groupIDs and
+        # instIDs. Groups in Google do not have any concept of being suspended.
+        all_google_gids = set(all_google_groups_by_gid.keys())
+
+        # Sanity check. We should not have lost anything. (I.e. the gids should be unique.)
+        if len(all_google_gids) != len(all_google_groups):
+            raise RuntimeError('Sanity check failed: group list changed length')
+
+        # Retrieve all Google group memberships. This is a mapping from internal Google group ids
+        # to lists of member resources, corresponding to both Lookup groups and institutions.
+        fields = ['id', 'email']
+        all_google_members = gapiutil.list_all_in_list(
+            self.state.directory_service, self.state.directory_service.members().list,
+            item_ids=[g['id'] for g in all_google_groups], id_key='groupKey',
+            batch_size=self.sync_config.batch_size, items_key='members',
+            fields='nextPageToken,members(' + ','.join(fields) + ')',
+            retries=self.sync_config.http_retries, retry_delay=self.sync_config.http_retry_delay,
+        )
+
+        # Sanity check. We should have a group members list for each managed group.
+        if len(all_google_members) != len(all_google_groups):
+            raise RuntimeError(
+                'Sanity check failed: groups in members map do not match group list')
+
+        # Log some stats.
+        LOG.info('Total Google groups: %s', len(all_google_gids))
+        LOG.info(
+            'Total Google group members: %s',
+            sum([len(m) for g, m in all_google_members.items()])
+        )
+
+        self.state.update({
+            'all_google_groups': all_google_groups,
+            'all_google_groups_by_gid': all_google_groups_by_gid,
+            'all_google_gids': all_google_gids,
+            'all_google_members': all_google_members,
+        })
+
+    def retrieve_group_settings(self):
+        # Build the groupssettings service, which is a parallel API to manage group settings
+        groupssettings_service = discovery.build(
+            'groupssettings', 'v1', credentials=self.creds
+        )
+        # Retrieve all Google group settings.
+        fields = ['email', *[k for k in self.sync_config.group_settings.keys()]]
+        all_google_group_settings = gapiutil.get_all_in_list(
+            groupssettings_service, groupssettings_service.groups().get,
+            item_ids=[g['email'] for g in self.state.all_google_groups], id_key='groupUniqueId',
+            batch_size=self.sync_config.batch_size, fields=','.join(fields),
+            retries=self.sync_config.http_retries, retry_delay=self.sync_config.http_retry_delay,
+        )
+
+        # Form a mapping from gid to Google group settings.
+        all_google_group_settings_by_gid = {
+            email_to_gid(g['email']): g for g in all_google_group_settings
+        }
+
+        # Sanity check. We should have settings for each managed group.
+        if len(all_google_group_settings_by_gid) != len(self.state.all_google_groups):
+            raise RuntimeError(
+                'Sanity check failed: group settings list does not match group list'
+            )
+
+        self.state.update({
+            'groupssettings_service': groupssettings_service,
+            'all_google_group_settings_by_gid': all_google_group_settings_by_gid,
+        })
+
+    def _fetch_groups(self, domain):
+        """
+        Function to fetch Google group information from the specified domain
+
+        """
+        fields = ['id', 'email', 'name', 'description']
+        return gapiutil.list_all(
+            self.state.directory_service.groups().list, items_key='groups', domain=domain,
+            fields='nextPageToken,groups(' + ','.join(fields) + ')',
+            retries=self.sync_config.http_retries, retry_delay=self.sync_config.http_retry_delay,
+        )
diff --git a/gsuitesync/sync/ldap.py b/gsuitesync/sync/ldap.py
new file mode 100644
index 0000000000000000000000000000000000000000..53230613064f0647a1004590e64910a74cf7a6a3
--- /dev/null
+++ b/gsuitesync/sync/ldap.py
@@ -0,0 +1,288 @@
+"""
+Load current user, group and institution data from Lookup.
+
+"""
+import logging
+import collections
+import ldap3
+
+from .base import ConfigurationStateConsumer
+
+LOG = logging.getLogger(__name__)
+
+# User and group information we need to populate the Google user directory.
+UserEntry = collections.namedtuple('UserEntry', 'uid cn sn displayName givenName')
+GroupEntry = collections.namedtuple('GroupEntry', 'groupID groupName description uids')
+
+
+class LDAPRetriever(ConfigurationStateConsumer):
+    required_config = ('ldap', )
+
+    def retrieve_users(self):
+        # Get a set containing all CRSids. These are all the people who are eligible to be in our
+        # GSuite instance. If a user is in GSuite and is *not* present in this list then they are
+        # suspended.
+        LOG.info('Reading eligible user entries from LDAP')
+        eligible_uids = self.get_eligible_uids()
+        LOG.info('Total LDAP user entries: %s', len(eligible_uids))
+
+        # Sanity check: there are some eligible users (else LDAP lookup failure?)
+        if len(eligible_uids) == 0:
+            raise RuntimeError('Sanity check failed: no users in eligible set')
+
+        # Get a list of managed users. These are all the people who match the "managed_user_filter"
+        # in the LDAP settings.
+        LOG.info('Reading managed user entries from LDAP')
+        managed_user_entries = self.get_managed_user_entries()
+
+        # Form a mapping from uid to managed user.
+        managed_user_entries_by_uid = {u.uid: u for u in managed_user_entries}
+
+        # Form a set of all *managed user* uids
+        managed_user_uids = set(managed_user_entries_by_uid.keys())
+        LOG.info('Total managed user entries: %s', len(managed_user_uids))
+
+        # Sanity check: the managed users should be a subset of the eligible ones.
+        if len(managed_user_uids - eligible_uids) != 0:
+            raise RuntimeError(
+                'Sanity check failed: some managed uids were not in the eligible set'
+            )
+
+        self.state.update({
+            'eligible_uids': eligible_uids,
+            'managed_user_entries_by_uid': managed_user_entries_by_uid,
+            'managed_user_uids': managed_user_uids,
+        })
+
+    def retrieve_groups(self):
+        # Get a set containing all groupIDs. These are all the groups that are eligible to be in
+        # our GSuite instance. If a group is in GSuite and is *not* present in this list then it
+        # is deleted.
+        LOG.info('Reading eligible group entries from LDAP')
+        eligible_groupIDs = self.get_eligible_groupIDs()
+        LOG.info('Total LDAP group entries: %s', len(eligible_groupIDs))
+
+        # Get a set containing all instIDs. These are all the institutions that are eligible to be
+        # in our GSuite instance. If an institution is in GSuite and is *not* present in this list
+        # then the corresponding group is deleted.
+        LOG.info('Reading eligible institution entries from LDAP')
+        eligible_instIDs = self.get_eligible_instIDs()
+        LOG.info('Total LDAP institution entries: %s', len(eligible_instIDs))
+
+        # Add these sets together to form the set of all gids (the IDs of all eligible groups and
+        # institutions).
+        eligible_gids = eligible_groupIDs | eligible_instIDs
+        LOG.info('Total combined LDAP group and institution entries: %s', len(eligible_gids))
+
+        # Get a list of managed groups. These are all the groups that match the
+        # "managed_group_filter" in the LDAP settings.
+        LOG.info('Reading managed group entries from LDAP')
+        managed_group_entries = self.get_managed_group_entries()
+
+        # Form a mapping from groupID to managed group.
+        managed_group_entries_by_groupID = {g.groupID: g for g in managed_group_entries}
+
+        # Form a set of all *managed group* groupIDs
+        managed_group_groupIDs = set(managed_group_entries_by_groupID.keys())
+        LOG.info('Total managed group entries: %s', len(managed_group_groupIDs))
+        LOG.info(
+            'Total managed group members: %s',
+            sum([len(g.uids) for g in managed_group_entries])
+        )
+
+        # Get a list of managed institutions. These are all the institutions that match the
+        # "managed_inst_filter" in the LDAP settings.
+        LOG.info('Reading managed institution entries from LDAP')
+        managed_inst_entries = self.get_managed_inst_entries()
+
+        # Form a mapping from instID to managed institution.
+        managed_inst_entries_by_instID = {i.groupID: i for i in managed_inst_entries}
+
+        # Form a set of all *managed institution* instIDs
+        managed_inst_instIDs = set(managed_inst_entries_by_instID.keys())
+        LOG.info('Total managed institution entries: %s', len(managed_inst_instIDs))
+        LOG.info(
+            'Total managed institution members: %s',
+            sum([len(i.uids) for i in managed_inst_entries])
+        )
+
+        # Add the collections of managed institutions to the collections of managed groups.
+        managed_group_entries += managed_inst_entries
+        managed_group_entries_by_gid = {
+            **managed_group_entries_by_groupID, **managed_inst_entries_by_instID
+        }
+        managed_group_gids = managed_group_groupIDs | eligible_instIDs
+        LOG.info(
+            'Total combined managed group and institution entries: %s', len(managed_group_gids)
+        )
+        LOG.info(
+            'Total combined managed group and institution members: %s',
+            sum([len(g.uids) for g in managed_group_entries])
+        )
+
+        # Sanity check: the managed groups should be a subset of the eligible ones.
+        if len(managed_group_gids - eligible_gids) != 0:
+            raise RuntimeError(
+                'Sanity check failed: some managed gids were not in the eligible set'
+            )
+
+        self.state.update({
+            'eligible_gids': eligible_gids,
+            'managed_group_entries_by_gid': managed_group_entries_by_gid,
+        })
+
+    ###
+    # Functions to perform LDAP calls
+    ###
+    def get_eligible_uids(self):
+        """
+        Return a set containing all uids who are eligible to have a Google account.
+
+        """
+        return {
+            e['attributes']['uid'][0]
+            for e in self._search(
+                search_base=self.ldap_config.user_search_base,
+                search_filter=self.ldap_config.eligible_user_filter,
+                attributes=['uid']
+            )
+        }
+
+    def get_eligible_groupIDs(self):
+        """
+        Return a set containing all groupIDs that are eligible for Google.
+
+        """
+        return {
+            e['attributes']['groupID'][0]
+            for e in self._search(
+                search_base=self.ldap_config.group_search_base,
+                search_filter=self.ldap_config.eligible_group_filter,
+                attributes=['groupID']
+            )
+        }
+
+    def get_eligible_instIDs(self):
+        """
+        Return a set containing all instIDs that are eligible for Google.
+
+        """
+        return {
+            e['attributes']['instID'][0]
+            for e in self._search(
+                search_base=self.ldap_config.inst_search_base,
+                search_filter=self.ldap_config.eligible_inst_filter,
+                attributes=['instID']
+            )
+        }
+
+    def get_managed_user_entries(self):
+        """
+        Return a list containing all managed user entries as UserEntry instances.
+
+        """
+        search_filter = (
+            self.ldap_config.managed_user_filter
+            if self.ldap_config.managed_user_filter is not None
+            else self.ldap_config.eligible_user_filter
+        )
+        return [
+            UserEntry(
+                uid=_extract(e, 'uid'), cn=_extract(e, 'cn'), sn=_extract(e, 'sn'),
+                displayName=_extract(e, 'displayName'), givenName=_extract(e, 'givenName')
+            )
+            for e in self._search(
+                search_base=self.ldap_config.user_search_base, search_filter=search_filter,
+                attributes=['uid', 'cn', 'sn', 'displayName', 'givenName']
+            )
+        ]
+
+    def get_managed_group_entries(self):
+        """
+        Return a list containing all managed group entries as GroupEntry instances.
+
+        """
+        search_filter = (
+            self.ldap_config.managed_group_filter
+            if self.ldap_config.managed_group_filter is not None
+            else self.ldap_config.eligible_group_filter
+        )
+        return [
+            GroupEntry(
+                groupID=_extract(e, 'groupID'), groupName=_extract(e, 'groupName'),
+                description=_extract(e, 'description'), uids=set(e['attributes'].get('uid', []))
+            )
+            for e in self._search(
+                search_base=self.ldap_config.group_search_base, search_filter=search_filter,
+                attributes=['groupID', 'groupName', 'description', 'uid']
+            )
+        ]
+
+    def get_managed_inst_entries(self):
+        """
+        Return a list containing all managed institution entries as GroupEntry instances.
+
+        Note that we return GroupEntry instances here since Lookup institutions become groups in
+        Google, and this simplifies the sync code by allowing us to handle institutions in the same
+        way as groups. The GroupEntry's groupID and groupName fields will be the institution's
+        instID and ou (name) respectively. Since Lookup institutions don't have descriptions, we
+        set the description field to the institution's name as well (in Google, the description
+        allows longer strings, and so will not truncate the name).
+
+        """
+        # This requires 2 LDAP queries. First find the managed institutions.
+        search_filter = (
+            self.ldap_config.managed_inst_filter
+            if self.ldap_config.managed_inst_filter is not None
+            else self.ldap_config.eligible_inst_filter
+        )
+        managed_insts = [
+            GroupEntry(
+                groupID=_extract(e, 'instID'), groupName=_extract(e, 'ou'),
+                description=_extract(e, 'ou'), uids=set(),
+            )
+            for e in self._search(
+                search_base=self.ldap_config.inst_search_base, search_filter=search_filter,
+                attributes=['instID', 'ou']
+            )
+        ]
+        managed_insts_by_instID = {g.groupID: g for g in managed_insts}
+
+        # Then get each eligible user's list of institutions and use that data to populate each
+        # institution's uid list.
+        eligible_users = self._search(
+            search_base=self.ldap_config.user_search_base,
+            search_filter=self.ldap_config.eligible_user_filter,
+            attributes=['uid', 'instID']
+        )
+        for e in eligible_users:
+            uid = e['attributes']['uid'][0]
+            for instID in e['attributes']['instID']:
+                if instID in managed_insts_by_instID:
+                    managed_insts_by_instID[instID].uids.add(uid)
+
+        return managed_insts
+
+    def _search(self, *, search_base, search_filter, attributes):
+        # Use SSL to access the LDAP server when authentication credentials
+        # have been configured
+        use_ssl = bool(self.ldap_config.username and self.ldap_config.password)
+        ldap_server = ldap3.Server(self.ldap_config.host, use_ssl=use_ssl)
+
+        # Add authentication credentials if configured
+        username = self.ldap_config.username if self.ldap_config.username else None
+        password = self.ldap_config.password if self.ldap_config.password else None
+
+        # Connect to the LDAP server and perform the query
+        with ldap3.Connection(ldap_server, username, password, auto_bind=True) as conn:
+            return conn.extend.standard.paged_search(
+                search_base, search_filter, paged_size=1000, attributes=attributes)
+
+
+def _extract(entry, attr, *, default=''):
+    vs = entry['attributes'].get(attr, [])
+    if len(vs) == 0:
+        return default
+    if isinstance(vs, str):
+        return vs
+    return vs[0]
diff --git a/gsuitesync/sync/main.py b/gsuitesync/sync/main.py
new file mode 100644
index 0000000000000000000000000000000000000000..657fde9f69b86da9704ef5133397ab81ff71c4ae
--- /dev/null
+++ b/gsuitesync/sync/main.py
@@ -0,0 +1,62 @@
+"""
+Synchronise Google Directory with a local LDAP directory.
+
+"""
+import logging
+
+from .. import config
+from .state import SyncState
+from .ldap import LDAPRetriever
+from .gapi import GAPIRetriever
+from .compare import Comparator
+from .update import GAPIUpdater
+
+LOG = logging.getLogger(__name__)
+
+
+def sync(configuration, *, read_only=True, group_settings=False, just_users=False):
+    """Perform sync given configuration dictionary."""
+    if read_only:
+        LOG.info('Performing synchronisation in READ ONLY mode.')
+    else:
+        LOG.info('Performing synchronisation in WRITE mode.')
+
+    # Parse configuration into Configuration dict of appropriate dataclasses
+    configuration = config.parse_configuration(configuration)
+
+    # Class to hold all state that can be updated by the process below then
+    # used to do updates
+    state = SyncState()
+
+    # Get users and optionally groups from Lookup
+    ldap = LDAPRetriever(configuration, state)
+    ldap.retrieve_users()
+    if not just_users:
+        ldap.retrieve_groups()
+
+    # Get users and optionally groups from Google
+    gapi = GAPIRetriever(configuration, state)
+    gapi.connect(read_only)
+    gapi.retrieve_users()
+    if not just_users:
+        gapi.retrieve_groups()
+        # Optionally get group settings too
+        if group_settings:
+            gapi.retrieve_group_settings()
+
+    # Compare users and optionally groups between Lookup and Google
+    comparator = Comparator(configuration, state)
+    comparator.compare_users()
+    if not just_users:
+        comparator.compare_groups()
+        # Optionally compare existing group settings too
+        if group_settings:
+            comparator.compare_groups_settings()
+    # Enforce creation/update limits
+    comparator.enforce_limits(just_users)
+
+    # Update Google with necessary updates found doing comparison
+    updater = GAPIUpdater(configuration, state, read_only)
+    updater.update_users()
+    if not just_users:
+        updater.update_groups()
diff --git a/gsuitesync/sync/state.py b/gsuitesync/sync/state.py
new file mode 100644
index 0000000000000000000000000000000000000000..480adf862cfb62e293c311e19d9e54379573ec7b
--- /dev/null
+++ b/gsuitesync/sync/state.py
@@ -0,0 +1,82 @@
+"""
+A dataclass to hold the built up state of Lookup and Google data and needed updates
+
+"""
+from typing import Optional
+from dataclasses import dataclass, field
+from googleapiclient import discovery
+
+
+@dataclass
+class SyncState:
+    ################
+    # Data retrieved from Lookup
+    ################
+
+    # user data
+    eligible_uids: set = field(default_factory=set)
+    managed_user_entries_by_uid: dict = field(default_factory=dict)
+    managed_user_uids: set = field(default_factory=set)
+    # group data
+    eligible_gids: set = field(default_factory=set)
+    managed_group_entries_by_gid: dict = field(default_factory=dict)
+
+    ################
+    # Components needed when communicating with Google API
+    ################
+    directory_service: Optional[discovery.Resource] = None
+    groupssettings_service: Optional[discovery.Resource] = None
+    groups_domain: str = ''
+    insts_domain: str = ''
+
+    ################
+    # Data retrieved from Google
+    ################
+
+    # user data
+    all_google_users: list = field(default_factory=list)
+    all_google_users_by_uid: dict = field(default_factory=dict)
+    all_google_uids: set = field(default_factory=set)
+    suspended_google_uids: set = field(default_factory=set)
+    # group data
+    all_google_groups: list = field(default_factory=list)
+    all_google_groups_by_gid: dict = field(default_factory=dict)
+    all_google_gids: set = field(default_factory=set)
+    # group membership data
+    all_google_members: dict = field(default_factory=dict)
+    # group settings data
+    all_google_group_settings_by_gid: dict = field(default_factory=dict)
+
+    ################
+    # Results of comparison
+    ################
+
+    # updates to users
+    google_user_updates: dict = field(default_factory=dict)
+    google_user_creations: dict = field(default_factory=dict)
+    uids_to_update: set = field(default_factory=set)
+    uids_to_add: set = field(default_factory=set)
+    uids_to_reactivate: set = field(default_factory=set)
+    uids_to_suspend: set = field(default_factory=set)
+    # updates to groups
+    google_group_updates: dict = field(default_factory=dict)
+    google_group_creations: dict = field(default_factory=dict)
+    gids_to_update: set = field(default_factory=set)
+    gids_to_add: set = field(default_factory=set)
+    gids_to_delete: set = field(default_factory=set)
+    # updates to group memberships
+    members_to_insert: list = field(default_factory=list)
+    members_to_delete: list = field(default_factory=list)
+    # updates to group settings
+    group_settings_to_update: dict = field(default_factory=dict)
+    gids_to_update_group_settings: set = field(default_factory=set)
+
+    ################
+    # Allow easy updating from dict
+    ################
+    def update(self, data: dict):
+        for key, value in data.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                raise RuntimeError(f"Attempt to add invalid key '{key}' to state")
diff --git a/gsuitesync/sync/update.py b/gsuitesync/sync/update.py
new file mode 100644
index 0000000000000000000000000000000000000000..d68152f4c5d2ff3ed2ce5a3a64fb9bd8ac6167a6
--- /dev/null
+++ b/gsuitesync/sync/update.py
@@ -0,0 +1,162 @@
+"""
+Perform the actual updates in Google (unless in read_only mode)
+
+"""
+import logging
+import crypt
+import secrets
+
+from .base import ConfigurationStateConsumer
+from .utils import gid_to_email, uid_to_email
+from ..gapiutil import process_requests
+
+LOG = logging.getLogger(__name__)
+
+
+class GAPIUpdater(ConfigurationStateConsumer):
+    required_config = ('sync', 'gapi_domain')
+
+    def __init__(self, configuration, state, read_only=True):
+        super(GAPIUpdater, self).__init__(configuration, state)
+        self.read_only = read_only
+
+    def update_users(self):
+        process_requests(
+            self.state.directory_service,
+            self.user_api_requests(),
+            self.sync_config, self.read_only)
+
+    def update_groups(self):
+        process_requests(
+            self.state.directory_service,
+            self.group_api_requests(),
+            self.sync_config, self.read_only)
+        # Still need to do this even if `not group_settings` as new groups need their settings
+        process_requests(
+            self.state.groupssettings_service,
+            self.group_settings_api_requests(),
+            self.sync_config, self.read_only)
+        process_requests(
+            self.state.directory_service,
+            self.member_api_requests(),
+            self.sync_config, self.read_only)
+
+    def user_api_requests(self):
+        """
+        A generator which will generate patch() and insert() calls to the directory service to
+        perform the actions required to update users
+
+        """
+        # Update existing users.
+        user_updates = {
+            uid: self.state.google_user_updates[uid] for uid in self.state.uids_to_update
+        }
+        for uid, update in user_updates.items():
+            google_id = self.state.all_google_users_by_uid[uid]['id']
+            # Only show the previous parts of name that have been changed
+            updated_google_user_name = update.get('name', {})
+            previous_google_user_name = self.state.all_google_users_by_uid[uid].get('name', {})
+            previous = {
+                k: previous_google_user_name.get(k, '')
+                for k in ['givenName', 'familyName']
+                if k in updated_google_user_name
+            }
+            LOG.info('Update user "%s": "%r" from "%r"', uid, update, previous)
+            yield self.state.directory_service.users().patch(userKey=google_id, body=update)
+
+        # Suspend old users
+        for uid in self.state.uids_to_suspend:
+            google_id = self.state.all_google_users_by_uid[uid]['id']
+            LOG.info('Suspending user: "%s"', uid)
+            yield self.state.directory_service.users().patch(
+                userKey=google_id, body={'suspended': True})
+
+        # Reactivate returning users
+        for uid in self.state.uids_to_reactivate:
+            google_id = self.state.all_google_users_by_uid[uid]['id']
+            LOG.info('Reactivating user: "%s"', uid)
+            yield self.state.directory_service.users().patch(
+                userKey=google_id, body={'suspended': False})
+
+        # Create new users
+        for uid in self.state.uids_to_add:
+            # Generate a random password which is thrown away.
+            new_user = {**{
+                'hashFunction': 'crypt',
+                'password': crypt.crypt(secrets.token_urlsafe(), crypt.METHOD_SHA512),
+                'orgUnitPath': self.sync_config.new_user_org_unit_path,
+            }, **self.state.google_user_creations[uid]}
+            redacted_user = {**new_user, **{'password': 'REDACTED'}}
+            LOG.info('Adding user "%s": %s', uid, redacted_user)
+            yield self.state.directory_service.users().insert(body=new_user)
+
+    def group_api_requests(self):
+        """
+        A generator which will generate patch(), insert() and delete() calls to the directory
+        service to perform the actions required to update groups
+
+        """
+        # Update existing groups
+        group_updates = {
+            gid: self.state.google_group_updates[gid] for gid in self.state.gids_to_update
+        }
+        for gid, update in group_updates.items():
+            google_id = self.state.all_google_groups_by_gid[gid]['id']
+            LOG.info('Update group "%s": "%r"', gid, update)
+            yield self.state.directory_service.groups().patch(groupKey=google_id, body=update)
+
+        # Delete cancelled groups
+        for gid in self.state.gids_to_delete:
+            google_id = self.state.all_google_groups_by_gid[gid]['id']
+            LOG.info('Deleting group: "%s"', gid)
+            yield self.state.directory_service.groups().delete(groupKey=google_id)
+
+        # Create new groups
+        for gid in self.state.gids_to_add:
+            new_group = self.state.google_group_creations[gid]
+            LOG.info('Adding group "%s": %s', gid, new_group)
+            yield self.state.directory_service.groups().insert(body=new_group)
+
+    def member_api_requests(self):
+        """
+        A generator which will generate insert() and delete() calls to the directory service to
+        perform the actions required to update group members
+
+        """
+        # Insert new members
+        for gid, uid in self.state.members_to_insert:
+            group_key = gid_to_email(gid, self.state.groups_domain, self.state.insts_domain)
+            user_key = uid_to_email(uid, self.gapi_domain_config.name)
+            LOG.info('Adding user "%s" to group "%s"', user_key, group_key)
+            yield self.state.directory_service.members().insert(
+                groupKey=group_key, body={'email': user_key})
+
+        # Delete removed members
+        for gid, uid in self.state.members_to_delete:
+            group_key = gid_to_email(gid, self.state.groups_domain, self.state.insts_domain)
+            user_key = uid_to_email(uid, self.gapi_domain_config.name)
+            LOG.info('Removing user "%s" from group "%s"', user_key, group_key)
+            yield self.state.directory_service.members().delete(
+                groupKey=group_key, memberKey=user_key)
+
+    def group_settings_api_requests(self):
+        """
+        A generator which will generate patch() calls to the groupssettings service to set or
+        update the required group settings.
+
+        """
+        # Apply all settings to new groups.
+        for gid in self.state.gids_to_add:
+            email = gid_to_email(gid, self.state.groups_domain, self.state.insts_domain)
+            settings = self.sync_config.group_settings
+            LOG.info('Updating settings for new group "%s": %s', gid, settings)
+            yield self.state.groupssettings_service.groups().patch(
+                groupUniqueId=email, body=settings)
+
+        # Update existing group settings (will be empty of `not group_settings`)
+        for gid in self.state.gids_to_update_group_settings:
+            email = gid_to_email(gid, self.state.groups_domain, self.state.insts_domain)
+            settings = self.state.group_settings_to_update[gid]
+            LOG.info('Updating settings for existing group "%s": %s', gid, settings)
+            yield self.state.groupssettings_service.groups().patch(
+                groupUniqueId=email, body=settings)
diff --git a/gsuitesync/sync/utils.py b/gsuitesync/sync/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..779e8e90a323f5a3a5b64e00888bfcc48aa5c063
--- /dev/null
+++ b/gsuitesync/sync/utils.py
@@ -0,0 +1,48 @@
+import logging
+import re
+
+LOG = logging.getLogger(__name__)
+
+
+# Functions to translate the unique identifiers of users, groups and institutions in Lookup
+# (uids, groupIDs and instIDs) to and from the unique identifiers used in Google (email
+# addresses).
+#
+# For users:   {uid}     <-> {uid}@{domain}
+# For groups:  {groupID} <-> {groupID}@{groups_domain}
+# For insts:   {instID}  <-> {instID.lower()}@{insts_domain}  (local part must be lowercase)
+#
+# Additionally, valid uids (CRSids) match the regex [a-z][a-z0-9]{3,7}, valid groupIDs match
+# the regex [0-9]{6,8} and valid instIDs match the regex [A-Z][A-Z0-9]+.
+#
+# Since Lookup institutions become groups in Google, we use common code to sync all Google
+# groups, regardless of whether they were groups or institutions in Lookup. In all the code
+# that follows, we use "gid" to refer to the unique identifier of the group or institution in
+# Lookup (i.e., gid may be either a Lookup groupID or instID).
+
+user_email_regex = re.compile('^[a-z][a-z0-9]{3,7}@.*$')
+groupID_regex = re.compile('^[0-9]{6,8}$')
+instID_regex = re.compile('^[A-Z][A-Z0-9]+$')
+
+
+def email_to_uid(email):
+    return email.split('@')[0] if user_email_regex.match(email) else None
+
+
+def email_to_gid(email):
+    gid = email.split('@')[0]
+    return (
+        gid if groupID_regex.match(gid) else
+        gid.upper() if instID_regex.match(gid.upper()) else None
+    )
+
+
+def uid_to_email(uid, domain):
+    return f'{uid}@{domain}'
+
+
+def gid_to_email(gid, groups_domain, insts_domain):
+    return (
+        f'{gid}@{groups_domain}' if groupID_regex.match(gid) else
+        f'{gid.lower()}@{insts_domain}' if instID_regex.match(gid) else None
+    )