diff --git a/CHANGELOG.md b/CHANGELOG.md index 958c56c7793d3a8ffcac482c1fca658e0d82959a..a2708005fda28d5e5d801aaad920d973e2f2031f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.5] - 2024-09-11 + +### Changed + +- No longer use lastLoginTime from Google user API, especially for determining users to delete +- Calculate and log numbers of users to delete even if not going to perform those deletions + ## [1.0.4] - 2024-07-08 ### Fixed diff --git a/gsuitesync/sync/base.py b/gsuitesync/sync/base.py index 12347e9580b8371229f898c338e3e681eabffa35..5c0e616a971a72c91d726c6732351b49315d1f36 100644 --- a/gsuitesync/sync/base.py +++ b/gsuitesync/sync/base.py @@ -7,11 +7,10 @@ Base classes for retrievers, comparator and updater classes that consume configu class ConfigurationStateConsumer: required_config = None - def __init__(self, configuration, state, write_mode=False, delete_users=False, cache_dir=None): + def __init__(self, configuration, state, write_mode=False, cache_dir=None): # For convenience, create properties for required configuration for c in self.required_config if self.required_config is not None else []: setattr(self, f"{c}_config", configuration.get(c, {})) self.state = state self.write_mode = write_mode - self.delete_users = delete_users self.cache_dir = cache_dir diff --git a/gsuitesync/sync/compare.py b/gsuitesync/sync/compare.py index 93f096e339d9ff1b3a6bc9d222fb6c74d317cc72..e6129e193a7088150d2bbbd773f991a511f26079 100644 --- a/gsuitesync/sync/compare.py +++ b/gsuitesync/sync/compare.py @@ -2,6 +2,7 @@ Compute differences between the Lookup and Google data. """ + import logging import itertools @@ -107,56 +108,34 @@ class Comparator(ConfigurationStateConsumer): LOG.info("Number of users to suspend: %s", len(uids_to_suspend)) # Users that need their MyDrive scanned for shared files, are those suspended users who - # haven't logged in to Google for a month, have a Lookup cancelledDate over a month ago, - # haven't been scanned already or already marked to be scanned, and aren't about to be - # reactivated. - + # have a Lookup cancelledDate over a month ago, haven't been scanned already or already + # marked to be scanned, and aren't about to be reactivated. uids_to_scan_mydrive = ( - self.state.never_logged_in_a_month_uids - & self.state.cancelled_a_month_ago_uids - & self.state.no_mydrive_shared_settings_uids + self.state.cancelled_a_month_ago_uids & self.state.no_mydrive_shared_settings_uids ) - uids_to_reactivate LOG.info("Number of users that need MyDrive scanned: %s", len(uids_to_scan_mydrive)) - # Form a set of all uids who have never logged in and should be deleted. This is all the - # already suspended Google uids (except those to be reactivated) plus the uids to be - # suspended, who have never logged in. - # These users will have typically been suspended by the previous sync run, as suspending - # and deleting in the same run can cause issues. - - uids_to_delete_who_never_logged_in = set() - if self.delete_users: - uids_to_delete_who_never_logged_in = ( - self.state.suspended_google_uids - uids_to_reactivate - ) & self.state.never_logged_in_google_uids - LOG.info( - f"Number of users to delete (never logged in):" - f" {len(uids_to_delete_who_never_logged_in)}" - ) - # Form a set of all uids who have already been suspended, have a mydrive scan result - # `permissions-none` and have been suspended in lookup for more than 24 months: - uids_to_delete_with_mydrive_shared_result_permission_none = set() - if self.delete_users: - uids_to_delete_with_mydrive_shared_result_permission_none = ( - (self.state.suspended_google_uids - uids_to_reactivate) - & self.state.permission_none_mydrive_shared_result_uids - & self.state.cancelled_twenty_four_months_ago_uids - ) - uids_to_scan_mydrive + # `permissions-none` and have been suspended in lookup for more than 24 months, and aren't + # about to be reactivated or are marked to be scanned. + uids_to_delete_with_mydrive_shared_result_permission_none = ( + (self.state.suspended_google_uids - uids_to_reactivate) + & self.state.permission_none_mydrive_shared_result_uids + & self.state.cancelled_twenty_four_months_ago_uids + ) - uids_to_scan_mydrive LOG.info( f"Number of users to delete (my-shared-result {MYDRIVE_SHARED_RESULT_NONE}):" f" {len(uids_to_delete_with_mydrive_shared_result_permission_none)}" ) # Form a set of all uids who have already been suspended, have a mydrive scan result - # `permissions-removed` and have been suspended in lookup for more than 24 months: - uids_to_delete_with_mydrive_shared_result_permission_removed = set() - if self.delete_users: - uids_to_delete_with_mydrive_shared_result_permission_removed = ( - (self.state.suspended_google_uids - uids_to_reactivate) - & self.state.permission_removed_mydrive_shared_result_uids - & self.state.cancelled_twenty_four_months_ago_uids - ) - uids_to_scan_mydrive + # `permissions-removed` and have been suspended in lookup for more than 24 months, and + # aren't about to be reactivated or are marked to be scanned. + uids_to_delete_with_mydrive_shared_result_permission_removed = ( + (self.state.suspended_google_uids - uids_to_reactivate) + & self.state.permission_removed_mydrive_shared_result_uids + & self.state.cancelled_twenty_four_months_ago_uids + ) - uids_to_scan_mydrive LOG.info( f"Number of users to delete (my-shared-result {MYDRIVE_SHARED_RESULT_REMOVED}):" f" {len(uids_to_delete_with_mydrive_shared_result_permission_removed)}" @@ -164,8 +143,7 @@ class Comparator(ConfigurationStateConsumer): # Form a super set of uids to delete uids_to_delete = ( - uids_to_delete_who_never_logged_in - | uids_to_delete_with_mydrive_shared_result_permission_none + uids_to_delete_with_mydrive_shared_result_permission_none | uids_to_delete_with_mydrive_shared_result_permission_removed ) LOG.info("Total number of users to delete: %s", len(uids_to_delete)) @@ -364,7 +342,7 @@ class Comparator(ConfigurationStateConsumer): } ) - def enforce_limits(self, just_users, licensing): + def enforce_limits(self, just_users, licensing, delete_users): # -------------------------------------------------------------------------------------------- # Enforce limits on how much data to change in Google. # -------------------------------------------------------------------------------------------- @@ -376,7 +354,7 @@ class Comparator(ConfigurationStateConsumer): | self.state.uids_to_update | self.state.uids_to_reactivate | self.state.uids_to_suspend - | self.state.uids_to_delete + | (self.state.uids_to_delete if delete_users else set()) | self.state.uids_to_restore | self.state.uids_to_scan_mydrive ) diff --git a/gsuitesync/sync/gapi.py b/gsuitesync/sync/gapi.py index 9350d34cf06ef7dcb08826495c3c50da9eaa3f37..42e37930264273dc4d6d4a01827d722cf7b34662 100644 --- a/gsuitesync/sync/gapi.py +++ b/gsuitesync/sync/gapi.py @@ -3,28 +3,25 @@ Load current user, group and institution data from Google. """ +import functools import logging import re -import functools +import socket from google.oauth2 import service_account from googleapiclient import discovery -import socket -from .base import ConfigurationStateConsumer from .. import gapiutil +from .base import ConfigurationStateConsumer from .utils import ( - email_to_uid, + cache_to_disk, + custom_action, email_to_gid, + email_to_uid, groupID_regex, instID_regex, - date_months_ago, - isodate_parse, - custom_action, - cache_to_disk, ) - LOG = logging.getLogger(__name__) # Scopes required to perform read-only actions. @@ -129,47 +126,6 @@ class GAPIRetriever(ConfigurationStateConsumer): uid for uid, u in all_google_users_by_uid.items() if u["suspended"] } - # As of July 2024, a single user was returned without a lastLoginTime field. We cannot - # assume users like this have never logged in as we may then accidentally delete them. - # We will handle them as a separate case, not delete them immediately like those who have - # never logged in. This also means they will never be scanned and thus never be deleted. - # This is very much a workaround and any users in this state should be investigated and - # treated as a failure. - # We also catch those with a falsy lastLoginTime field and treat them the same as having - # no value. - no_last_login_time_uids = { - uid - for uid, u in all_google_users_by_uid.items() - if "lastLoginTime" not in u or not u["lastLoginTime"] - } - # All the users with a valid lastLoginTime field for the following checks that require it. - valid_google_users_by_id = { - uid: u - for uid, u in all_google_users_by_uid.items() - if uid not in no_last_login_time_uids - } - - # Also form a set of all uids who have never logged in - # (lastLoginTime will be 1970-01-01T00:00:00.000Z) - never_logged_in_google_uids = { - uid for uid, u in valid_google_users_by_id.items() if u["lastLoginTime"][:4] == "1970" - } - # And form a set of all suspended uids who have never logged in in last month - a_month_ago = date_months_ago(1) - never_logged_in_a_month_uids = { - uid - for uid, u in valid_google_users_by_id.items() - if u["suspended"] and isodate_parse(u["lastLoginTime"]) < a_month_ago - } - # And form a set of all suspended uids who have never logged in in last six months - # (now only used for reporting as scanning happens after 1 month) - six_months_ago = date_months_ago(6) - never_logged_in_six_months_uids = { - uid - for uid, u in valid_google_users_by_id.items() - if u["suspended"] and isodate_parse(u["lastLoginTime"]) < six_months_ago - } - # Form a set of suspended uids that have no mydrive-shared-action or -result no_mydrive_shared_settings_uids = { uid @@ -218,22 +174,6 @@ class GAPIRetriever(ConfigurationStateConsumer): # Log some stats. LOG.info("Total Google users: %s", len(all_google_uids)) LOG.info("Suspended Google users: %s", len(suspended_google_uids)) - LOG.info("No Last Logged-in Time Google users: %s", len(no_last_login_time_uids)) - if len(no_last_login_time_uids) > 0: - LOG.info( - "Users with no Last Logged-in Time%s: %s", - " (first 50)" if len(no_last_login_time_uids) > 50 else "", - list(sorted(no_last_login_time_uids))[:50], - ) - LOG.info("Never Logged-in Google users: %s", len(never_logged_in_google_uids)) - LOG.info( - "Not Logged-in for a month suspended Google users: %s", - len(never_logged_in_a_month_uids), - ) - LOG.info( - "Not Logged-in for 6 months suspended Google users: %s", - len(never_logged_in_six_months_uids), - ) LOG.info( "No MyDrive settings suspended Google users: %s", len(no_mydrive_shared_settings_uids) ) @@ -253,10 +193,6 @@ class GAPIRetriever(ConfigurationStateConsumer): "all_google_users_by_uid": all_google_users_by_uid, "all_google_uids": all_google_uids, "suspended_google_uids": suspended_google_uids, - "no_last_login_time_uids": no_last_login_time_uids, - "never_logged_in_google_uids": never_logged_in_google_uids, - "never_logged_in_a_month_uids": never_logged_in_a_month_uids, - "never_logged_in_six_months_uids": never_logged_in_six_months_uids, "no_mydrive_shared_settings_uids": no_mydrive_shared_settings_uids, "permission_none_mydrive_shared_result_uids": permission_none_mydrive_shared_result_uids, # NOQA: E501 "permission_removed_mydrive_shared_result_uids": permission_removed_mydrive_shared_result_uids, # NOQA: E501 diff --git a/gsuitesync/sync/lookup.py b/gsuitesync/sync/lookup.py index fbcee557d6a32312f02fc132d22a21c12dfcc1ba..883345db42aee3fc1ade63998ca374c189d945d2 100644 --- a/gsuitesync/sync/lookup.py +++ b/gsuitesync/sync/lookup.py @@ -2,6 +2,7 @@ Load current user, group and institution data from Lookup. """ + import collections import functools import logging @@ -106,7 +107,7 @@ class LookupRetriever(ConfigurationStateConsumer): def retrieve_cancelled_user_dates(self): """ Retrieve information about cancelled dates for suspended google users and form a set of - those cancelled more than 6 months ago and 24 months ago. + those cancelled more than a month ago and 24 months ago. """ @@ -122,18 +123,6 @@ class LookupRetriever(ConfigurationStateConsumer): ) self.state.update({"cancelled_a_month_ago_uids": cancelled_a_month_ago_uids}) - six_months_ago = date_months_ago(6) - cancelled_six_months_ago_uids = { - uid - for uid, cancelledDate in self.cancelled_dates_by_uid.items() - if cancelledDate and isodate_parse(cancelledDate) < six_months_ago - } - LOG.info( - "Total Lookup users cancelled more than 6 months ago: %s", - len(cancelled_six_months_ago_uids), - ) - self.state.update({"cancelled_six_months_ago_uids": cancelled_six_months_ago_uids}) - twenty_four_months_ago = date_months_ago(24) cancelled_twenty_four_months_ago_uids = { uid @@ -271,14 +260,13 @@ class LookupRetriever(ConfigurationStateConsumer): def cancelled_dates_by_uid(self): """ Return a dictionary mapping CRSid to cancelledDate for cancelled users. Limit to - suspended Google uids that haven't logged in for 6 months to reduce unnecessary calls - to Lookup API. + suspended Google uids. """ LOG.info("Reading cancelled date for suspended Google users from Lookup") - uids_to_retrieve = list(self.state.never_logged_in_a_month_uids) + uids_to_retrieve = list(self.state.suspended_google_uids) chunk = 100 crsid_chunks = [ uids_to_retrieve[x : x + chunk] for x in range(0, len(uids_to_retrieve), chunk) diff --git a/gsuitesync/sync/main.py b/gsuitesync/sync/main.py index 15b7a1721d278f286464f22dbfa6a002fcc83d46..c1f4d77e312502b61454c42c7eb51fcbd3c3b458 100644 --- a/gsuitesync/sync/main.py +++ b/gsuitesync/sync/main.py @@ -2,6 +2,7 @@ Synchronise Google Directory with Lookup using API Gateway. """ + import logging from .. import config @@ -27,7 +28,14 @@ def sync( ): """Perform sync given configuration dictionary.""" - LOG.info(f'Performing synchronisation in {"WRITE" if write_mode else "READ ONLY"} mode.') + LOG.info("Performing synchronisation:") + LOG.info("- mode = %s", "WRITE" if write_mode else "READ ONLY") + LOG.info( + "- update groups = %s", + "NO" if just_users else ("YES (with group settings)" if group_settings else "YES"), + ) + LOG.info("- update licensing = %s", "YES" if licensing else "NO") + LOG.info("- delete users = %s", "YES" if delete_users else "NO") # Parse configuration into Configuration dict of appropriate dataclasses configuration = config.parse_configuration(configuration) @@ -58,7 +66,7 @@ def sync( lookup.retrieve_cancelled_user_dates() # Compare users and optionally groups between Lookup and Google - comparator = Comparator(configuration, state, delete_users=delete_users) + comparator = Comparator(configuration, state) comparator.compare_users() if licensing: comparator.compare_licensing() @@ -68,11 +76,11 @@ def sync( if group_settings: comparator.compare_groups_settings() # Enforce creation/update limits - comparator.enforce_limits(just_users, licensing) + comparator.enforce_limits(just_users, licensing, delete_users) # Update Google with necessary updates found doing comparison - updater = GAPIUpdater(configuration, state, write_mode=write_mode, delete_users=delete_users) - updater.update_users() + updater = GAPIUpdater(configuration, state, write_mode=write_mode) + updater.update_users(delete_users) if licensing: updater.update_licensing() if not just_users: diff --git a/gsuitesync/sync/state.py b/gsuitesync/sync/state.py index 4ec934abc94eed566de95b04ac651715968440cf..adc7d1cbea3195e57678e2fdda06e5c71525d8be 100644 --- a/gsuitesync/sync/state.py +++ b/gsuitesync/sync/state.py @@ -18,7 +18,6 @@ class SyncState: managed_user_entries_by_uid: dict = field(default_factory=dict) managed_user_uids: set = field(default_factory=set) cancelled_a_month_ago_uids: set = field(default_factory=set) - cancelled_six_months_ago_uids: set = field(default_factory=set) cancelled_twenty_four_months_ago_uids: set = field(default_factory=set) # licensing @@ -45,10 +44,6 @@ class SyncState: all_google_users_by_uid: dict = field(default_factory=dict) all_google_uids: set = field(default_factory=set) suspended_google_uids: set = field(default_factory=set) - no_last_login_time_uids: set = field(default_factory=set) - never_logged_in_google_uids: set = field(default_factory=set) - never_logged_in_a_month_uids: set = field(default_factory=set) - never_logged_in_six_months_uids: set = field(default_factory=set) no_mydrive_shared_settings_uids: set = field(default_factory=set) permission_none_mydrive_shared_result_uids: set = field(default_factory=set) permission_removed_mydrive_shared_result_uids: set = field(default_factory=set) diff --git a/gsuitesync/sync/update.py b/gsuitesync/sync/update.py index 7e84b32ee7e8ba9b7cbd5f00c33d9a6864b58aac..dd02405b4a98286bcb3522bcda1f6b7c4a31b264 100644 --- a/gsuitesync/sync/update.py +++ b/gsuitesync/sync/update.py @@ -17,10 +17,10 @@ LOG = logging.getLogger(__name__) class GAPIUpdater(ConfigurationStateConsumer): required_config = ("sync", "gapi_domain", "licensing") - def update_users(self): + def update_users(self, delete_users=False): process_requests( self.state.directory_service, - self.user_api_requests(), + self.user_api_requests(delete_users), self.sync_config, write_mode=self.write_mode, ) @@ -57,7 +57,7 @@ class GAPIUpdater(ConfigurationStateConsumer): write_mode=self.write_mode, ) - def user_api_requests(self): + def user_api_requests(self, delete_users=False): """ A generator which will generate update(), insert(), delete() and undelete() calls to the directory service to perform the actions required to update users @@ -89,7 +89,7 @@ class GAPIUpdater(ConfigurationStateConsumer): ) # Delete suspended users - if self.delete_users: + if delete_users: for uid in self.state.uids_to_delete: google_id = self.state.all_google_users_by_uid[uid]["id"] LOG.info('Deleting user: "%s"', uid) diff --git a/setup.py b/setup.py index c5fd4c18a8e6d85a4a2b71df4044938f68adddd0..eb4cfba5cc1b70aaa133198721faf8f1dad4be0c 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def load_requirements(): setup( name="gsuitesync", - version="1.0.4", + version="1.0.5", packages=find_packages(), install_requires=load_requirements(), entry_points={