diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000000000000000000000000000000000000..737e33f684a0edb74cc6e037f34f164b416ae542
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,4 @@
+[run]
+omit =
+	.tox/*
+	setup.py
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000000000000000000000000000000000..0384309049d7979e29df8789264f71d7e58b4657
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,6 @@
+.git
+
+# Various build directories
+.tox
+build
+*.egg-info
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000000000000000000000000000000000000..ff5d1b3626574517bba81357e741ff1556b82ae7
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,13 @@
+# Editorconfig file for cross-platform configuration of editors.
+root=true
+
+[*.py]
+max_line_length=99
+
+[*.{yml,yaml}]
+indent_style=space
+indent_size=2
+
+[*.md]
+indent_style=space
+indent_size=2
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000000000000000000000000000000000000..a984856b2260dc1e6fdd4f36333001ef8b3ebefc
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,3 @@
+[flake8]
+max-line-length=99
+exclude = venv,.tox
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ae493c97c759b6742de79c0c6c44c58b64df12bc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,114 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+.static_storage/
+.media/
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+*.sqlite3
+
+# PyCharm
+.idea
+
+# Service account credentials (if instructions in README are followed)
+credentials.json
+
+# Local configuration
+gsuitesync.yaml
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8825656abcc390566716dd7d1aa901099332650e
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,17 @@
+# This file pulls in the GitLab AutoDevOps configuration via an include
+# directive and then overrides bits. The rationale for this is we'd like this
+# file to eventually have zero local overrides so that we can use the AutoDevOps
+# pipeline as-is.
+
+include:
+  # Bring in the AutoDevOps template from GitLab.
+  # It can be viewed at:
+  # https://gitlab.com/gitlab-org/gitlab-ee/blob/master/lib/gitlab/ci/templates/Auto-DevOps.gitlab-ci.yml
+  - template: Auto-DevOps.gitlab-ci.yml
+
+  # Overrides to AutoDevOps for testing
+  - project: 'uis/devops/continuous-delivery/ci-templates'
+    file: '/auto-devops/tox-tests.yml'
+
+variables:
+  DOCUMENTATION_DISABLED: "1"
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..01ad27fe9db7ff16354fa48166aaf1cdfd8e672d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+# This Dckerfile is intended only to support the Auto-DevOps pipeline on GitLab.
+# It's not intended to package the application.
+
+FROM uisautomation/python:3.7-alpine
+
+WORKDIR /usr/src/app
+
+# Install specific requirements for the package along with tox to be able to run
+# the tests.
+ADD requirements.txt ./
+RUN pip install tox && pip install -r requirements.txt
+
+# Copy application source and install it.
+ADD ./ ./
+RUN pip install -e ./
+
+ENTRYPOINT ["gcp-sql-backup"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..2b96072572a7e51f6222943204d5b0d9853c8643
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 University of Cambridge Information Services
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 00ee4c977962e54791913f7bf4dcfa00ae60a3ac..9c26e59f9ce3a39e275df925f83138bc6f6a8e2f 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,95 @@
 # Google GSuite Synchronisation Tool
+
+This repository contains a custom synchronisation tool for synchronising
+information from the [Lookup service](https://www.lookup.cam.ac.uk/)'s LDAP
+personality to a Google hosted domain (aka "GSuite").
+
+Configuration is performed via a configuration file. Take a look at the [example
+configuration file](configuration-example.yaml) for more information.
+
+## Usage
+
+The tool can be invoked from the command line:
+
+```console
+$ gsuitesync
+```
+
+By default this will log what will be done. To actually perform the
+synchronisation:
+
+```console
+$ gsuitesync --really-do-this
+```
+
+See the output of ``gsuitesync --help`` for more information on valid
+command-line flags.
+
+Unless overridden on the command line, the tool searches for its configuration
+file in the following places in the following order:
+
+* A ``gsuitesync.yaml`` file in the current directory.
+* ``~/.gsuitesync/configuration.yaml``.
+* ``/etc/gsuitesync/configuration.yaml``.
+
+The first located file is used.
+
+## Installation
+
+The command-line tool can be installed directly from the git repository:
+
+```console
+$ pip3 install git+https://gitlab.developers.cam.ac.uk/uis/gsuite/synctool.git
+```
+
+For developers, the script can be installed from a cloned repo using ``pip``:
+
+```console
+$ cd /path/to/this/repo
+$ pip3 install -e .
+```
+
+## New users
+
+When new users are created they are created with a random password which is
+immediately thrown away. They are created with a primary email of the form
+``[uid]@[domain]`` where ``[uid]`` is the unique id from lookup (i.e. the CRSid)
+and ``[domain]`` is the name of the Google domain from the configuration.
+
+## Required API scopes
+
+This tool requires the following OAuth2 scopes to audition the changes to be
+made:
+
+* ``https://www.googleapis.com/auth/admin.directory.user.readonly``
+
+This tool requires the following OAuth2 scopes to actually perform changes:
+
+* ``https://www.googleapis.com/auth/admin.directory.user``
+
+See the section on preparing a service account for information on how to grant a
+service account those scopes on your domain.
+
+## Preparing a service account
+
+This tool assumes it will be acting as a service account user. It will use this
+service account user to then act on behalf of an admin user in GSuite. To
+prepare such a service account user:
+
+1. Create a service account in the Google Console for this script.
+2. Generate and download JSON credentials for the service account.
+3. Under "IAM" > "Service Accounts", select the service account, click "Edit",
+   click "Show domain-wide delegation" and "Enable G Suite Domain-wide
+   Delegation". Click "Save" to apply the changes.
+4. Hover over the "?" symbol next to the generated client id and click "view
+   client". Copy the Client ID from the popup panel.
+5. In the GSuite admin panel, go to "Security Settings" > "Advanced Settings" >
+   "Manage API client access".
+6. Paste in the service account Client ID as "Client Name" and add a
+   comma-separated list of scopes. See the section on required API scopes.
+
+The scary-sounding "Enable G Suite Domain-wide Delegation" means that this
+service account is marked as being willing to "su" to another Google user. By
+adding the generated Client ID to the GSuite security settings you are, as
+domain administrator, giving that service account the ability to act as any user
+in the domain **subject to the listed scopes**.
diff --git a/configuration-example.yaml b/configuration-example.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0d0e106c38cc4f83e8b89ab39e5411dd09d3f0f8
--- /dev/null
+++ b/configuration-example.yaml
@@ -0,0 +1,90 @@
+# Example of Google directory sync configuration.
+
+# Synchronisation configuration
+sync:
+  # A regular expression which is used to match the organization unit path for
+  # Google users who should be excluded from the list returned by Google. Those
+  # users do not exist for the purposes of the rest of the sync and so if they
+  # appear in the list of managed users this script will attempt to re-add them
+  # and fail in the process. Use this setting for users who are managed
+  # completely outside of this script.
+  ignore_google_org_unit_path_regex: '^/Service Accounts$'
+
+# Configure limits defining maximum scope of changes.
+limits:
+  # The abort_... settings below are safety limits and will abort the run if the
+  # limits are violated. They are there to define the "sane limits" for an
+  # update.
+
+  # Refuse to perform sync if we are to "touch" more than this percentage of
+  # users. The percentage of users "touched" is calculated as
+  #
+  #   (new google users + modified google users) / max(1, total google users)
+  #
+  # where "modified" includes metadata changes and suspension/restoration. As
+  # such this calculated percentage can be greater than 100. Set to null to
+  # have no limit. Default: null.
+  abort_user_change_percentage: 2 # percent
+
+  # The max_... settings below will not abort the run if the number of users
+  # affected is greater than the specified number. Instead the number of users
+  # affected is capped to that number. The selection of which users are included
+  # in the capped number is arbitrary.
+
+  # Limit the number of new user creations per run. This is an absolute number.
+  # Set to null to have no limit. Default: null.
+  max_new_users: 100
+
+  # Limit the number of user suspensions per run. This is an absolute number.
+  # Set to null to have no limit. Default: null.
+  max_suspended_users: 100
+
+  # Limit the number of user un-suspensions (reactivations) per run. This is an
+  # absolute number. Set to null to have no limit. Default: null.
+  max_reactivated_users: 100
+
+  # Limit the number of user metadata changes per run. This is an absolute
+  # number. Set to null to have no limit. Default: null
+  max_updated_users: 100
+
+# Google API configuration
+google_api:
+  # Authentication
+  auth:
+    # Path to on-disk JSON credentials used when accessing the API.
+    credentials: "./credentials.json"
+
+    # Path to on-disk JSON credentials used when accessing the API in
+    # "read-only" mode. Use this if you want to have a separate "safe" service
+    # account which can only read data. If null, use the same credentials for
+    # reading and writing. Default: null.
+    read_only_credentials: null
+
+# Details about the LDAP server
+ldap:
+  # Scheme and hostname of the LDAP server.
+  host: 'ldaps://ldap.example.com'
+
+  # LDAP search base. Filters are always relative to this.
+  search_base: 'ou=people,o=example-corps,dc=example,dc=com'
+
+  # Filter to use to determine the "eligible" list of users. If a non-admin user
+  # is found on Google who isn't in this list, their account will be suspended.
+  eligible_user_filter: '(uid=*)'
+
+  # Filter to use to determine the "managed" list of users. If a user appears in
+  # this list who isn't in Google their account is created. If the user metadata
+  # for a user in this list changes, the change is propagated to Google. If
+  # null, the value of "eligible_user_filter" is used. Default: null.
+  managed_user_filter: null
+
+# Details about the Google Domain we're managing.
+google_domain:
+  # Name of the domain.
+  name: 'example.com'
+
+  # Username within the GSuite for the user which has administration rights.
+  # Should be an e-mail style name. E.g. "super-admin@example.com". The service
+  # account credentials specified in the google_api.auth section are used to
+  # perform admin actions as this user.
+  admin_user: 'super-admin@example.com'
diff --git a/gsuitesync/__init__.py b/gsuitesync/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3677d732b116622e1b99a3f41b305cce5be649a1
--- /dev/null
+++ b/gsuitesync/__init__.py
@@ -0,0 +1,45 @@
+"""
+Synchronise users to GSuite
+
+Usage:
+    gsuitesync (-h | --help)
+    gsuitesync [--configuration=FILE] [--quiet] [--really-do-this]
+
+Options:
+    -h, --help                  Show a brief usage summary.
+
+    --quiet                     Reduce logging verbosity.
+
+    --configuration=FILE        Specify configuration file to load.
+
+    --really-do-this            Actually try to make the changes.
+
+"""
+import logging
+import os
+import sys
+
+import docopt
+
+from . import config
+from . import sync
+
+
+LOG = logging.getLogger(os.path.basename(sys.argv[0]))
+
+
+def main():
+    opts = docopt.docopt(__doc__)
+
+    # Configure logging
+    logging.basicConfig(level=logging.WARN if opts['--quiet'] else logging.INFO)
+
+    # HACK: make the googleapiclient.discovery module less spammy in the logs
+    logging.getLogger('googleapiclient.discovery').setLevel(logging.WARN)
+    logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
+
+    LOG.info('Loading configuration')
+    configuration = config.load_configuration(opts['--configuration'])
+
+    # Perform sync
+    sync.sync(configuration, read_only=not opts['--really-do-this'])
diff --git a/gsuitesync/__main__.py b/gsuitesync/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7c70d0be2f94f22b62f5b9fea21536367fe62e8
--- /dev/null
+++ b/gsuitesync/__main__.py
@@ -0,0 +1,4 @@
+from . import main
+
+if __name__ == '__main__':
+    main()
diff --git a/gsuitesync/config.py b/gsuitesync/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f51965719b94eccfcc40a527e21cdf087f4885f
--- /dev/null
+++ b/gsuitesync/config.py
@@ -0,0 +1,90 @@
+"""
+Utilities for parsing configuration files.
+
+"""
+import dataclasses
+import logging
+import os
+
+import yaml
+
+LOG = logging.getLogger(__name__)
+
+
+class ConfigurationError(RuntimeError):
+    """
+    Base class for all configuration errors.
+
+    """
+
+
+class ConfigurationNotFound(ConfigurationError):
+    """
+    A suitable configuration could not be located.
+
+    """
+    def __init__(self):
+        return super().__init__('Could not find any configuration file')
+
+
+def load_configuration(location=None):
+    """
+    Load configuration and return a :py:class:`Configuration` instance. Pass a non-None location to
+    override the default search path.
+
+    :raises: ConfigurationError if the configuration could not be loaded.
+
+    """
+    if location is not None:
+        paths = [location]
+    else:
+        if 'GSUITESYNC_CONFIGURATION' in os.environ:
+            paths = [os.environ['GSUITESYNC_CONFIGURATION']]
+        else:
+            paths = []
+        paths.extend([
+            os.path.join(os.getcwd(), 'gsuitesync.yaml'),
+            os.path.expanduser('~/.gsuitesync/configuration.yaml'),
+            '/etc/gsuitesync/configuration.yaml'
+        ])
+
+    valid_paths = [path for path in paths if os.path.isfile(path)]
+
+    if len(valid_paths) == 0:
+        LOG.error('Could not find configuration file. Tried:')
+        for path in paths:
+            LOG.error('"%s"', path)
+        raise ConfigurationNotFound()
+
+    with open(valid_paths[0]) as f:
+        return yaml.safe_load(f)
+
+
+class ConfigurationDataclassMixin:
+    """
+    Mixin class for dataclass which adds a "from_dict" member which will construct an instance from
+    a dictionary. Fields which have no default value become required fields.
+
+    """
+
+    @classmethod
+    def from_dict(cls, dict_):
+        """
+        Construct an instance from a dict.
+
+        """
+        field_names = {field.name for field in dataclasses.fields(cls)}
+        required_field_names = {
+            field.name for field in dataclasses.fields(cls)
+            if field.default is dataclasses.MISSING
+        }
+
+        for key in dict_.keys():
+            if key not in field_names:
+                raise ValueError(f'Unknown configuration key: {key}')
+
+        for key in required_field_names:
+            if key not in dict_:
+                raise ValueError(f'{key}: required field not set')
+
+        return cls(**dict_)
diff --git a/gsuitesync/gapiauth.py b/gsuitesync/gapiauth.py
new file mode 100644
index 0000000000000000000000000000000000000000..436e7696a9562658888d450f37b2ba1dd3bd74ba
--- /dev/null
+++ b/gsuitesync/gapiauth.py
@@ -0,0 +1,43 @@
+"""
+Google API authentication.
+
+"""
+import dataclasses
+import logging
+import typing
+
+from google.oauth2 import service_account
+
+from .config import ConfigurationDataclassMixin
+
+
+LOG = logging.getLogger(__name__)
+
+
+@dataclasses.dataclass
+class Configuration(ConfigurationDataclassMixin):
+    """
+    Configuration of Google API access credentials.
+
+    """
+    # Path to on-disk JSON credentials used when accessing the API.
+    credentials: str
+
+    # Path to on-disk JSON credentials used when accessing the API in "read-only" mode. Use this if
+    # you want to have a separate "safe" service account which can only read data. If null, use the
+    # same credentials for reading and writing.
+    read_only_credentials: typing.Union[str, None] = None
+
+    def load_credentials(self, *, read_only=True):
+        """
+        Create a Google credentials object from the configuration. Use *read_only* to indicate if
+        read-only credentials are preferred.
+
+        """
+        credentials = self.credentials
+        if read_only and self.read_only_credentials is not None:
+            credentials = self.read_only_credentials
+            LOG.info('Using read-only credentials.')
+
+        LOG.info('Loading Google account credentials from "%s"', credentials)
+        return service_account.Credentials.from_service_account_file(credentials)
diff --git a/gsuitesync/gapidomain.py b/gsuitesync/gapidomain.py
new file mode 100644
index 0000000000000000000000000000000000000000..467f2684e572e4fc13202918700cff03583a41cc
--- /dev/null
+++ b/gsuitesync/gapidomain.py
@@ -0,0 +1,22 @@
+"""
+Google Domain management.
+
+"""
+import dataclasses
+
+from .config import ConfigurationDataclassMixin
+
+
+@dataclasses.dataclass
+class Configuration(ConfigurationDataclassMixin):
+    """
+    Configuration for accessing the Google Domain.
+
+    """
+    # Name of the domain. (E.g. "example.com".)
+    name: str
+
+    # Username within the GSuite for the user which has administration rights. Should be an e-mail
+    # style name. E.g. "super-admin@example.com". The service account credentials specified in the
+    # google_api.auth section are used to perform admin actions as this user.
+    admin_user: str
diff --git a/gsuitesync/gapiutil.py b/gsuitesync/gapiutil.py
new file mode 100644
index 0000000000000000000000000000000000000000..05a2a46c5a7878c09361c1d9fcce85d39dd6f88e
--- /dev/null
+++ b/gsuitesync/gapiutil.py
@@ -0,0 +1,27 @@
+"""
+Utility functions which should have been part of the Google API client.
+
+"""
+
+
+def list_all(list_cb, *, page_size=500, items_key='items',  **kwargs):
+    """
+    Simple wrapper for Google Client SDK list()-style callables. Repeatedly fetches pages of
+    results merging all the responses together. Returns the merged "items" arrays from the
+    responses. The key used to get the "items" array from the response may be overridden via the
+    items_key argument.
+
+    """
+    # Loop while we wait for nextPageToken to be "none"
+    page_token = None
+    resources = []
+    while True:
+        list_response = list_cb(pageToken=page_token, maxResults=page_size, **kwargs).execute()
+        resources.extend(list_response.get(items_key, []))
+
+        # Get the token for the next page
+        page_token = list_response.get('nextPageToken')
+        if page_token is None:
+            break
+
+    return resources
diff --git a/gsuitesync/ldap.py b/gsuitesync/ldap.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c11b245d32412898c728e9730235ddf052b6b8b
--- /dev/null
+++ b/gsuitesync/ldap.py
@@ -0,0 +1,75 @@
+"""
+Retrieving user information from an LDAP directory.
+
+"""
+import collections
+import dataclasses
+import typing
+
+import ldap3
+
+from .config import ConfigurationDataclassMixin
+
+
+# User information we need to populate the Google user directory.
+UserEntry = collections.namedtuple('UserEntry', 'uid cn sn displayName')
+
+
+@dataclasses.dataclass
+class Configuration(ConfigurationDataclassMixin):
+    """
+    Configuration for accessing the LDAP directory.
+
+    """
+    host: str
+
+    search_base: str
+
+    eligible_user_filter: str
+
+    managed_user_filter: typing.Union[str, None] = None
+
+    def get_eligible_uids(self):
+        """
+        Return a set containing all uids who are eligible to have a Google account.
+
+        """
+        return {
+            e['attributes']['uid'][0]
+            for e in self._search(search_filter=self.eligible_user_filter, attributes=['uid'])
+        }
+
+    def get_managed_user_entries(self):
+        """
+        Return a list containing all managed user entries as UserEntry instances.
+
+        """
+        search_filter = (
+            self.managed_user_filter
+            if self.managed_user_filter is not None
+            else self.eligible_user_filter
+        )
+        return [
+            UserEntry(
+                uid=_extract(e, 'uid'), cn=_extract(e, 'cn'), sn=_extract(e, 'sn'),
+                displayName=_extract(e, 'displayName')
+            )
+            for e in self._search(
+                search_filter=search_filter, attributes=['uid', 'cn', 'sn', 'displayName']
+            )
+        ]
+
+    def _search(self, *, search_filter, attributes):
+        ldap_server = ldap3.Server(self.host)
+        with ldap3.Connection(ldap_server, auto_bind=True) as conn:
+            return conn.extend.standard.paged_search(
+                self.search_base, search_filter, paged_size=1000, attributes=attributes)
+
+
+def _extract(entry, attr, *, default=''):
+    vs = entry['attributes'].get(attr, [])
+    if len(vs) == 0:
+        return default
+    if isinstance(vs, str):
+        return vs
+    return vs[0]
diff --git a/gsuitesync/limits.py b/gsuitesync/limits.py
new file mode 100644
index 0000000000000000000000000000000000000000..602451ebf17a6e216e9885cbaee1e9a9e8503f4e
--- /dev/null
+++ b/gsuitesync/limits.py
@@ -0,0 +1,48 @@
+"""
+Synchronisation limits.
+
+"""
+import dataclasses
+import numbers
+import typing
+
+from . import config
+
+
+@dataclasses.dataclass
+class Configuration(config.ConfigurationDataclassMixin):
+    """
+    Configuration for synchronisation limits.
+
+    """
+    # The abort_... settings below are safety limits and will abort the run if the limits are
+    # violated. They are there to define the "sane limits" for an update.
+
+    # Refuse to perform sync if we are to "touch" more than this percentage of users. The
+    # percentage of users "touched" is calculated as
+    #
+    #   (new google users + modified google users) / max(1, total google users)
+    #
+    # where "modified" includes metadata changes and suspension/restoration. As such this
+    # calculated percentage can be greater than 100. Set to null to have no limit. Default: null.
+    abort_user_change_percentage: typing.Union[None, numbers.Real] = None
+
+    # The max_... settings below will not abort the run if the number of users affected is greater
+    # than the specified number. Instead the number of users affected is capped to that number. The
+    # selection of which users are included in the capped number is arbitrary.
+
+    # Limit the number of new user creations per run. This is an absolute number. Set to None to
+    # have no limit.
+    max_new_users: typing.Union[None, numbers.Real] = None
+
+    # Limit the number of user suspensions per run. This is an absolute number. Set to None to
+    # have no limit.
+    max_suspended_users: typing.Union[None, numbers.Real] = None
+
+    # Limit the number of user un-suspensions (reactivations) per run. This is an absolute number.
+    # Set to None to have no limit.
+    max_reactivated_users: typing.Union[None, numbers.Real] = None
+
+    # Limit the number of user metadata changes per run. This is an absolute number. Set to None to
+    # have no limit.
+    max_updated_users: typing.Union[None, numbers.Real] = None
diff --git a/gsuitesync/naming.py b/gsuitesync/naming.py
new file mode 100644
index 0000000000000000000000000000000000000000..a658610de1373c9b44d86f740d3f45830500a12f
--- /dev/null
+++ b/gsuitesync/naming.py
@@ -0,0 +1,173 @@
+"""
+Utilities for constructing human-friendly names.
+
+"""
+import collections
+
+
+# The human-friendly names constructed by get_names().
+Names = collections.namedtuple('Names', 'given_name family_name')
+
+
+def get_names(*, uid, display_name=None, cn=None, sn=None):
+    """
+    If we only have a uid, this is used for both given name and family name.
+
+    >>> get_names(uid='spqr1')
+    Names(given_name='spqr1', family_name='spqr1')
+    >>> get_names(uid='spqr1', display_name='spqr1')
+    Names(given_name='spqr1', family_name='spqr1')
+    >>> get_names(uid='spqr1', display_name='spqr1', cn='spqr1', sn='spqr1')
+    Names(given_name='spqr1', family_name='spqr1')
+    >>> get_names(uid='spqr1', display_name='')
+    Names(given_name='spqr1', family_name='spqr1')
+    >>> get_names(uid='spqr1', sn='')
+    Names(given_name='spqr1', family_name='spqr1')
+    >>> get_names(uid='spqr1', cn='')
+    Names(given_name='spqr1', family_name='spqr1')
+
+    "Odd" ASCII characters unsupported by Google are stripped out of names.
+
+    >>> get_names(uid='spqr1', display_name='Stephen @**Quill-Roman**@')
+    Names(given_name='Stephen', family_name='Quill-Roman')
+
+    Long names are truncated.
+
+    >>> get_names(uid='spqr1', display_name='Stephen Quill-Roman' + 'X' * 200)
+    ... #doctest: +NORMALIZE_WHITESPACE
+    Names(given_name='Stephen',
+          family_name='Quill-RomanXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
+
+    For compatibility with the existing authenticator, if we have common name and surname and the
+    common name ends with the surname, this is used to form the given names assuming there is some
+    string left. (Once we're happy with the sync, we should remove this.)
+
+    >>> get_names(uid='spqr1', sn='Quill Roman', cn='Prof. S.P. Quill Roman')
+    Names(given_name='Prof. S.P.', family_name='Quill Roman')
+
+    For similar compatibility reasons, if we have a cn, split it at the final space character
+    irrespective of display name.
+
+    >>> get_names(uid='spqr1', cn='Prof. S.P. Quill Roman', display_name='Foo Bar')
+    Names(given_name='Prof. S.P. Quill', family_name='Roman')
+    >>> get_names(
+    ...     uid='spqr1', sn='spqr1', display_name='Stephen P. Q. Roman',
+    ...     cn='Prof. S.P.Q. Roman')
+    Names(given_name='Prof. S.P.Q.', family_name='Roman')
+
+    (In the future we will probably remove the compatibility layer but not until we're happy with
+    the rest of the sync.)
+
+    If we have display name and surname and the display name ends with the surname, this is used to
+    form the given names assuming there is some string left.
+
+    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Quill Roman')
+    Names(given_name='Stephen', family_name='Quill Roman')
+
+    If this didn't work but we have display name, split it at the final space.
+
+    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Stephen Quill Roman')
+    Names(given_name='Stephen Quill', family_name='Roman')
+    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', cn='Stephen')
+    Names(given_name='Stephen Quill', family_name='Roman')
+    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman')
+    Names(given_name='Stephen Quill', family_name='Roman')
+
+    Support Wookey.
+
+    >>> get_names(uid='spqr1', display_name='Wookey')
+    Names(given_name='Wookey', family_name='spqr1')
+    >>> get_names(uid='spqr1', sn='Wookey')
+    Names(given_name='spqr1', family_name='Wookey')
+    >>> get_names(uid='spqr1', cn='Wookey')
+    Names(given_name='Wookey', family_name='spqr1')
+
+    """
+    # If any of display name, common name or surname is the same as the uid, proceed as if it were
+    # unset. Trim any leading/trailing whitespace at the same time.
+    cn = cn.strip() if cn is not None and cn != uid else None
+    sn = sn.strip() if sn is not None and sn != uid else None
+    display_name = (
+        display_name.strip()
+        if display_name is not None and display_name != uid else None
+    )
+
+    # If any of cn, sn or display_name are blank, proceed as it they're not set.
+    cn = cn if cn != '' else None
+    sn = sn if sn != '' else None
+    display_name = display_name if display_name != '' else None
+
+    # Function to construct return value from family name and given name. Google names can't be
+    # longer than 60 characters so truncate them after cleaning.
+    def _make_ret(*, family_name, given_name):
+        return Names(family_name=_clean(family_name)[:60], given_name=_clean(given_name)[:40])
+
+    # If we have a sn and cn and the cn ends with sn, split out the sn.
+    if cn is not None and sn is not None and cn.endswith(sn):
+        given_name = cn[:-len(sn)].strip()
+        if given_name != '':
+            return _make_ret(family_name=sn, given_name=given_name)
+
+    # If we have cn, split at space and see if we have two parts.
+    if cn is not None:
+        components = cn.split()
+        if len(components) > 0:
+            family_name = components[-1]
+            given_name = ' '.join(components[:-1])
+            if given_name != '' and family_name != '':
+                return _make_ret(family_name=family_name, given_name=given_name)
+
+    # If we have a sn and display name and the display name ends with sn, split out the sn.
+    if display_name is not None and sn is not None and display_name.endswith(sn):
+        given_name = display_name[:-len(sn)].strip()
+        if given_name != '':
+            return _make_ret(family_name=sn, given_name=given_name)
+
+    # If we have the display name, split at space and see if we have two parts.
+    if display_name is not None:
+        components = display_name.split()
+        if len(components) > 0:
+            family_name = components[-1]
+            given_name = ' '.join(components[:-1])
+            if given_name != '' and family_name != '':
+                return _make_ret(family_name=family_name, given_name=given_name)
+
+    # Support Wookey.
+    if display_name is not None and ' ' not in display_name:
+        return _make_ret(family_name=uid, given_name=display_name)
+    if sn is not None and ' ' not in sn:
+        return _make_ret(family_name=sn, given_name=uid)
+    if cn is not None and ' ' not in cn:
+        return _make_ret(family_name=uid, given_name=cn)
+
+    # Give up and return uid for both fields
+    return _make_ret(family_name=uid, given_name=uid)
+
+
+def _clean(s):
+    """
+    Clean any "bad characters" in names. This pattern is based on the one used by the
+    legacy Google authenticator which has this comment:
+
+        Google API doesn't like _some_ characters. The 'documentation'
+        (http://www.google.com/support/a/bin/answer.py?answer=33386) says "First and last names
+        support unicode/UTF-8 characters, and may contain spaces, letters (a-z), numbers (0-9),
+        dashes (-), forward slashes (/), and periods (.)", which makes no sence [sic].
+        Experimentation suggests it chokes on '<', '>', and '=', but doesn't mind, e.g. cyrilic
+        characters. Compromise by filtering out "!"#$%&'()*+,:;<=>?@[\\]^_`{|}~" - i.e. all the
+        'odd' ASCII characters other than the ones explicitly supported.
+
+    We change this to allow "'" since plenty of names have this character. (E.g. "O'Reilly",
+    "D'Angelo", etc.)
+
+    >>> _clean('ab@c')
+    'abc'
+    >>> _clean('a "b" c')
+    'a b c'
+
+    """
+    return ''.join(c for c in s if c not in _CLEAN_BAD_CHARS)
+
+
+# Characters stripped by _clean. Present as a constant to avoid re-creating it.
+_CLEAN_BAD_CHARS = '!"#$%&()*+,:;<=>?@[\\]^_`{|}~'
diff --git a/gsuitesync/sync.py b/gsuitesync/sync.py
new file mode 100644
index 0000000000000000000000000000000000000000..be993173e94e4964cf28598e07b74572a4c13dc1
--- /dev/null
+++ b/gsuitesync/sync.py
@@ -0,0 +1,358 @@
+"""
+Synchronise Google Directory with a local LDAP directory.
+
+"""
+import crypt
+import dataclasses
+import itertools
+import logging
+import re
+import secrets
+import typing
+
+from googleapiclient import discovery
+
+from . import config
+from . import gapiauth
+from . import gapidomain
+from . import gapiutil
+from . import ldap
+from . import limits
+from . import naming
+
+LOG = logging.getLogger(__name__)
+
+# Scopes required to perform read-only actions.
+READ_ONLY_SCOPES = [
+    'https://www.googleapis.com/auth/admin.directory.user.readonly',
+]
+
+# Scoped *in addition to READ_ONLY_SCOPES* required to perform a full update.
+WRITE_SCOPES = [
+    'https://www.googleapis.com/auth/admin.directory.user',
+]
+
+
+@dataclasses.dataclass
+class Configuration(config.ConfigurationDataclassMixin):
+    # A regular expression which is used to match the organization unit path for Google users who
+    # should be excluded from the list returned by Google. Those users do not exist for the
+    # purposes of the rest of the sync and so if they appear in the list of managed users this
+    # script will attempt to re-add them and fail in the process. Use this setting for users who
+    # are managed completely outside of this script.
+    ignore_google_org_unit_path_regex: typing.Union[str, None] = None
+
+
+def sync(configuration, *, read_only=True):
+    """Perform sync given configuration dictionary."""
+    if read_only:
+        LOG.info('Performing synchronisation in READ ONLY mode.')
+    else:
+        LOG.info('Performing synchronisation in WRITE mode.')
+
+    # Parse configuration
+    sync_config = Configuration.from_dict(configuration.get('sync', {}))
+    gapi_auth_config = gapiauth.Configuration.from_dict(
+        configuration.get('google_api', {}).get('auth', {}))
+    gapi_domain_config = gapidomain.Configuration.from_dict(
+        configuration.get('google_domain', {}))
+    ldap_config = ldap.Configuration.from_dict(configuration.get('ldap', {}))
+    limits_config = limits.Configuration.from_dict(configuration.get('limits', {}))
+
+    # Load appropriate Google credentials.
+    creds = (
+        gapi_auth_config.load_credentials(read_only=read_only)
+        .with_scopes(READ_ONLY_SCOPES + ([] if read_only else WRITE_SCOPES))
+        .with_subject(gapi_domain_config.admin_user)
+    )
+
+    # Get a set containing all CRSids. These are all the people who are eligible to be in our
+    # GSuite instance. If a user is in GSuite and is *not* present in this list then they are
+    # suspended.
+    LOG.info('Reading eligible user entries from LDAP')
+    eligible_uids = ldap_config.get_eligible_uids()
+    LOG.info('Total LDAP entries: %s', len(eligible_uids))
+
+    # Get a list of managed users. These are all the people who match the "managed_user_filter" in
+    # the LDAP settings.
+    LOG.info('Reading managed user entries from LDAP')
+    managed_user_entries = ldap_config.get_managed_user_entries()
+
+    # Form a mapping from uid to managed user.
+    managed_user_entries_by_uid = {u.uid: u for u in managed_user_entries}
+
+    # Form a set of all *managed user* uids
+    managed_user_uids = set(managed_user_entries_by_uid.keys())
+    LOG.info('Total managed user entries: %s', len(managed_user_uids))
+
+    # Sanity check: the managed users should be a subset of the eligible ones.
+    if len(managed_user_uids - eligible_uids) != 0:
+        raise RuntimeError('Sanity check failed: some managed uids were not in the eligible set')
+
+    # Build the directory service using Google API discovery.
+    directory_service = discovery.build('admin', 'directory_v1', credentials=creds)
+
+    # Retrieve information on all users excluding domain admins.
+    LOG.info('Getting information on Google domain users')
+    fields = [
+        'id', 'isAdmin', 'orgUnitPath', 'primaryEmail', 'suspended', 'suspensionReason',
+        'name(givenName, familyName)',
+    ]
+    all_google_users = gapiutil.list_all(
+        directory_service.users().list, items_key='users', domain=gapi_domain_config.name,
+        query='isAdmin=false', fields='nextPageToken,users(' + ','.join(fields) + ')',
+    )
+
+    # Strip any "to be ignored" users out of the results.
+    if sync_config.ignore_google_org_unit_path_regex is not None:
+        LOG.info(
+            'Ignoring users whose organization unit path matches %r',
+            sync_config.ignore_google_org_unit_path_regex)
+        regex = re.compile(sync_config.ignore_google_org_unit_path_regex)
+        all_google_users = [
+            u for u in all_google_users if not regex.match(u['orgUnitPath'])
+        ]
+
+    # Sanity check. There should be no admins in the returned results.
+    if any(u.get('isAdmin', False) for u in all_google_users):
+        raise RuntimeError('Sanity check failed: admin users in user list')
+
+    # Form a mapping from uid to Google user. We form the uid by splitting out the local-part of
+    # the email address.
+    all_google_users_by_uid = {u['primaryEmail'].split('@')[0]: u for u in all_google_users}
+
+    # Form a set of all Google-side uids. The all_google_uids set is all users including the
+    # suspended ones and the suspended_google_uids set is only the suspended users. Non suspended
+    # users are therefore all_google_uids - suspended_google_uids.
+    all_google_uids = set(all_google_users_by_uid.keys())
+    suspended_google_uids = {uid for uid, u in all_google_users_by_uid.items() if u['suspended']}
+
+    # Sanity check. We should not have lost anyone. (I.e. the uid should be unique.)
+    if len(all_google_uids) != len(all_google_users):
+        raise RuntimeError('Sanity check failed: user list changed length')
+
+    # Log some stats.
+    LOG.info('Total Google users: %s', len(all_google_uids))
+    LOG.info(
+        'Suspended Google users: %s', sum(1 if u['suspended'] else 0 for u in all_google_users))
+
+    # For each user which exists in Google or the managed user set which is eligible, determine if
+    # they need updating/creating. If so, record a patch/insert for the user.
+    LOG.info('Calculating updates...')
+    google_user_updates = {}
+    google_user_creations = {}
+    for idx, (uid, managed_user_entry) in enumerate(managed_user_entries_by_uid.items()):
+        # Show progress
+        if (idx + 1) % 5000 == 0:
+            LOG.info('Processed %s/%s...', idx+1, len(managed_user_entries_by_uid))
+
+        # Heuristically determine the given and family names.
+        names = naming.get_names(
+            uid=uid, display_name=managed_user_entry.displayName, cn=managed_user_entry.cn,
+            sn=managed_user_entry.sn)
+
+        # Form expected user resource fields.
+        expected_google_user = {
+            'name': {
+                'givenName': names.given_name,
+                'familyName': names.family_name,
+            },
+        }
+
+        # Find existing Google user (if any).
+        existing_google_user = all_google_users_by_uid.get(uid)
+
+        if existing_google_user is not None:
+            # See if we need to change the existing user
+            # Unless anything needs changing, the patch is empty.
+            patch = {}
+
+            # Determine how to patch user's name.
+            google_user_name = existing_google_user.get('name', {})
+            patch_name = {}
+            if google_user_name.get('givenName') != expected_google_user['name']['givenName']:
+                patch_name['givenName'] = names.given_name
+            if google_user_name.get('familyName') != expected_google_user['name']['familyName']:
+                patch_name['familyName'] = names.family_name
+            if len(patch_name) > 0:
+                patch['name'] = patch_name
+
+            # Only record non-empty patches.
+            if len(patch) > 0:
+                google_user_updates[uid] = patch
+        else:
+            # No existing Google user. Record the new resource. Generate a new user password and
+            # send Google the hash. It doesn't matter what this password is since we never have the
+            # user log in with it. For password-only applications the user can make use of an
+            # application-specific password.
+            new_user = {
+                **{
+                    'primaryEmail': f'{uid}@{gapi_domain_config.name}',
+                },
+                **expected_google_user,
+            }
+            google_user_creations[uid] = new_user
+
+    # Form a set of all the uids which need patching.
+    uids_to_update = set(google_user_updates.keys())
+    LOG.info('Number of existing users to update: %s', len(uids_to_update))
+
+    # Form a set of all the uids which need adding.
+    uids_to_add = set(google_user_creations.keys())
+    LOG.info('Number of users to add: %s', len(uids_to_add))
+
+    # Form a set of all uids which need reactivating. We reactive users who are in the managed user
+    # list *and* the suspended user list.
+    uids_to_reactivate = suspended_google_uids & managed_user_uids
+    LOG.info('Number of users to reactivate: %s', len(uids_to_reactivate))
+
+    # Form a set of all uids which should be suspended. This is all the unsuspended Google uids
+    # which do not appear in our eligible user list.
+    uids_to_suspend = (all_google_uids - suspended_google_uids) - eligible_uids
+    LOG.info('Number of users to suspend: %s', len(uids_to_suspend))
+
+    # Calculate percentage change.
+    user_change_percentage = 100. * (
+        len(uids_to_add | uids_to_update | uids_to_reactivate | uids_to_suspend)
+        /
+        max(1, len(all_google_uids))
+    )
+    LOG.info('Configuration will modify %.2f%% of users', user_change_percentage)
+
+    # Enforce percentage change sanity check.
+    if (limits_config.abort_user_change_percentage is not None and
+            user_change_percentage > limits_config.abort_user_change_percentage):
+        LOG.error(
+            'Modification of %.2f%% of users is greater than limit of %.2f%%. Aborting.',
+            user_change_percentage, limits_config.abort_user_change_percentage
+        )
+        raise RuntimeError('Aborting due to large user change percentage')
+
+    # Cap maximum size of various operations.
+    if limits_config.max_new_users is not None and len(uids_to_add) > limits_config.max_new_users:
+        uids_to_add = _limit(uids_to_add, limits_config.max_new_users)
+        LOG.info('Capped number of new users to %s', len(uids_to_add))
+    if (limits_config.max_suspended_users is not None and
+            len(uids_to_suspend) > limits_config.max_suspended_users):
+        uids_to_suspend = _limit(uids_to_suspend, limits_config.max_suspended_users)
+        LOG.info('Capped number of users to suspend to %s', len(uids_to_suspend))
+    if (limits_config.max_reactivated_users is not None and
+            len(uids_to_reactivate) > limits_config.max_reactivated_users):
+        uids_to_reactivate = _limit(uids_to_reactivate, limits_config.max_reactivated_users)
+        LOG.info('Capped number of users to reactivate to %s', len(uids_to_reactivate))
+    if (limits_config.max_updated_users is not None and
+            len(uids_to_update) > limits_config.max_updated_users):
+        uids_to_update = _limit(uids_to_update, limits_config.max_updated_users)
+        LOG.info('Capped number of users to update to %s', len(uids_to_update))
+
+    # A generator which will generate patch() and insert() calls to the directory service to
+    # perform the actions required
+    def api_requests():
+        # Update existing users.
+        user_updates = {uid: google_user_updates[uid] for uid in uids_to_update}
+        for uid, update in user_updates.items():
+            google_id = all_google_users_by_uid[uid]['id']
+            LOG.info('Update user "%s": "%r"', uid, update)
+            yield directory_service.users().patch(userKey=google_id, body=update)
+
+        # Suspend old users
+        for uid in uids_to_suspend:
+            google_id = all_google_users_by_uid[uid]['id']
+            LOG.info('Suspending user: "%s"', uid)
+            yield directory_service.users().patch(userKey=google_id, body={'suspended': True})
+
+        # Reactivate returning users
+        for uid in uids_to_reactivate:
+            google_id = all_google_users_by_uid[uid]['id']
+            LOG.info('Reactivating user: "%s"', uid)
+            yield directory_service.users().patch(userKey=google_id, body={'suspended': False})
+
+        # Create new users
+        for uid in uids_to_add:
+            # Generate a random password which is thrown away.
+            new_user = {**{
+                'hashFunction': 'crypt',
+                'password': crypt.crypt(secrets.token_urlsafe(), crypt.METHOD_SHA512),
+            }, **google_user_creations[uid]}
+            redacted_user = {**new_user, **{'password': 'REDACTED'}}
+            LOG.info('Adding user "%s": %s', uid, redacted_user)
+            yield directory_service.users().insert(body=new_user)
+
+    # Make an chunked iterator of requests to the directory API. The Directory API supports a
+    # maximum batch size of 1000. See:
+    # https://developers.google.com/admin-sdk/directory/v1/guides/batch
+    for request_batch in _grouper(api_requests(), n=1000):
+        # Form batch request.
+        batch = directory_service.new_batch_http_request()
+        for request in request_batch:
+            batch.add(request, callback=_handle_batch_response)
+
+        # Execute the batch request if not in read only mode. Otherwise log that we would have.
+        if not read_only:
+            LOG.info('Issuing batch request to Google.')
+            batch.execute()
+        else:
+            LOG.info('Not issuing batch request in read-only mode.')
+
+
+def _handle_batch_response(request_id, response, exception):
+    if exception is not None:
+        LOG.error('Error performing request: %s', exception)
+        LOG.error('Response: %r', response)
+
+
+def _limit(s, limit):
+    """
+    Given a set, s, and a numeric limit, return a set which has no more than *limit* elements. The
+    exact set of elements retained is not specified.
+
+    >>> s = set('ABCDEFGHIJKLMNOPQ')
+    >>> len(s) > 5
+    True
+    >>> len(_limit(s, 5)) == 5
+    True
+    >>> len(_limit(s, 500)) == len(s)
+    True
+
+    All elements of the returned set are taken from input set.
+
+    >>> s_prime = _limit(s, 5)
+    >>> s_prime - s
+    set()
+
+    """
+    return {e for _, e in itertools.takewhile(lambda p: p[0] < limit, enumerate(s))}
+
+
+def _grouper(iterable, *, n):
+    """
+    Group an iterable into chunks of at most *n* elements. A generator which yields iterables
+    representing slices of *iterable*.
+
+    >>> [list(i) for i in _grouper('ABCDEFGH', n=3)]
+    [['A', 'B', 'C'], ['D', 'E', 'F'], ['G', 'H']]
+    >>> def generator(stop):
+    ...     for x in range(stop):
+    ...         yield x
+    >>> [list(i) for i in _grouper(generator(10), n=3)]
+    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
+    >>> [list(i) for i in _grouper(generator(12), n=3)]
+    [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]
+
+    The implementation of this function attempts to be efficient; the chunks are iterables which
+    are generated on demand rather than being constructed first. Hence this function can deal with
+    iterables which would fill memory if intermediate chunks were stored.
+
+    >>> i = _grouper(generator(100000000000000000000), n=1000000000000000)
+    >>> next(next(i))
+    0
+
+    """
+    it = iter(iterable)
+    while True:
+        next_chunk_it = itertools.islice(it, n)
+        try:
+            first = next(next_chunk_it)
+        except StopIteration:
+            return
+        yield itertools.chain((first,), next_chunk_it)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98401d8d3a6c26c452d4444837b8c6895a9b8380
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+PyYAML
+docopt
+google-api-python-client
+google-auth
+ldap3
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..5be311a3d0b49298233e6ae10563453847852123
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,33 @@
+import os
+
+from setuptools import setup, find_packages
+
+
+def load_requirements():
+    """
+    Load requirements file and return non-empty, non-comment lines with leading and trailing
+    whitespace stripped.
+    """
+    with open(os.path.join(os.path.dirname(__file__), 'requirements.txt')) as f:
+        return [
+            line.strip() for line in f
+            if line.strip() != '' and not line.strip().startswith('#')
+        ]
+
+
+setup(
+    name='gsuitesync',
+    version='0.9.0',
+    packages=find_packages(),
+    install_requires=load_requirements(),
+    entry_points={
+        'console_scripts': [
+            'gsuitesync=gsuitesync:main',
+        ]
+    },
+    extras_require={
+        ':python_version < "3.7"': [
+            'dataclasses',
+        ],
+    }
+)
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000000000000000000000000000000000000..f9a34d9f942685cf7984b8026ad4de10ee8a9f93
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,63 @@
+# Tox runner configuration
+#
+# The following optional environment variables can change behaviour. See the
+# comments where they are used for more information.
+#
+# - TOXINI_ARTEFACT_DIR
+# - TOXINI_FLAKE8_VERSION
+# - TOXINI_WORK_DIR
+#
+[tox]
+# Envs which should be run by default.
+envlist=flake8,py3
+# Allow overriding toxworkdir via environment variable
+toxworkdir={env:TOXINI_WORK_DIR:{toxinidir}/.tox}
+# Avoid .egg-info directories
+skipsdist=True
+
+# The "_vars" section is ignored by tox but we place some useful shared
+# variables in it to avoid needless repetition.
+[_vars]
+# Where to write build artefacts. We default to the "build" directory in the
+# tox.ini file's directory. Override with the TOXINI_ARTEFACT_DIR environment
+# variable.
+build_root={env:TOXINI_ARTEFACT_DIR:{toxinidir}/build}
+
+[testenv]
+# Additional dependencies
+deps=
+    .
+    coverage
+    pytest
+    pytest-cov
+# Which environment variables should be passed into the environment.
+passenv=
+#   Allow people to override the coverage report location should they so wish.
+    COVERAGE_FILE
+#   Location of the coverage.xml file
+    COVERAGE_XML_FILE
+# How to run the test suite. Note that arguments passed to tox are passed on to
+# the test command.
+commands=
+    pytest --doctest-modules --cov={toxinidir} --junitxml={[_vars]build_root}/{envname}/junit.xml
+    coverage html --directory {[_vars]build_root}/{envname}/htmlcov/
+    coverage xml -o {env:COVERAGE_XML_FILE:{[_vars]build_root}/{envname}/coverage.xml}
+# Allow sitepackages setting to be overridden via TOX_SITEPACKAGES environment
+# variable. The tox container uses this to avoid re-installing the same packages
+# over and over again.
+sitepackages={env:TOXINI_SITEPACKAGES:False}
+
+[testenv:py3]
+basepython=python3
+
+# Check for PEP8 violations
+[testenv:flake8]
+basepython=python3
+deps=
+#   We specify a specific version of flake8 to avoid introducing "false"
+#   regressions when new checks are introduced. The version of flake8 used may
+#   be overridden via the TOXINI_FLAKE8_VERSION environment variable.
+    flake8=={env:TOXINI_FLAKE8_VERSION:3.6.0}
+commands=
+    flake8 --version
+    flake8 .