FAQ | This is a LIVE service | Changelog

Skip to content
Snippets Groups Projects
Commit c6893e15 authored by Robin Goodall's avatar Robin Goodall :speech_balloon:
Browse files

Rescan recovery

parent 3b3345cb
No related branches found
No related tags found
1 merge request!28Rescan recovery
......@@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.1.2] - 2023-10-17
### Added
- Additional options for "recover" operation:
- --user=CRSid to target a single user without needing to scan all users for
those marked for recovery
- --rescan to scan for existing permissions and only add those remaining
## [1.1.1] - 2023-07-10
### Updated
......
......@@ -4,7 +4,7 @@ Drive Management Tool
Usage:
gdrivemanager (-h | --help)
gdrivemanager [options] scan
gdrivemanager [options] recover
gdrivemanager [options] recover [--user=CRSID] [--rescan]
gdrivemanager [options] shared-drive-usage
gdrivemanager [options] report
(--instid=INSTID [--children] | --user=CRSID | --groupid=GROUPID | --request)
......@@ -18,10 +18,14 @@ Options:
-v, --verbose Increase logging verbosity.
-c, --configuration=FILE Specify configuration file to load.
-w, --write Actually try to make the changes.
--rescan Rescan for existing restored permissions so they
don't need to be restored again.
-i, --instid=INSTID Lookup Institution ID to create a report for.
--children Recursively include the child institutions of
the institution specified by --instid.
-u, --user=CRSID CRSid to create a report for.
-u, --user=CRSID CRSid to recover or create a report for.
-g, --groupid=GROUPID Group ID (or shortname) to create a report for.
-r, --request Search for request file in the report folder.
--cache-users Cache user quota in a local file (dev only)
......@@ -80,6 +84,8 @@ def main():
elif opts["recover"]:
manage.recover(
configuration,
user_id=opts["--user"],
rescan=opts["--rescan"],
write_mode=opts["--write"],
)
elif opts["shared-drive-usage"]:
......
......@@ -345,17 +345,40 @@ class GoogleDirectoryConnection(GoogleAPIConnection):
self.process_requests(requests)
def user_has_google_account(self, id):
def retrieve_user(self, user_key):
# We possibly don't need all these but it matches those returned by retrieve_users()
fields = [
"id",
"isAdmin",
"orgUnitPath",
"primaryEmail",
"suspended",
"lastLoginTime",
"name(givenName, familyName)",
"customSchemas",
]
try:
LOG.info(f"Checking if {id} has a Google account.")
_ = self.service.users().get(userKey=id).execute()
return True
return (
self.service.users()
.get(
userKey=user_key,
fields=",".join(fields),
projection="custom",
customFieldMask="UCam",
)
.execute()
)
except HttpError as e:
# users().get() can return 403 or 404 if the user doesn't exist
if e.resp.status in (403, 404):
return False
return None
raise
def user_has_google_account(self, id):
LOG.info(f"Checking if {id} has a Google account.")
user = self.retrieve_user(id)
return user is not None
class GoogleDriveConnection(GoogleAPIConnection):
required_config = ("google", "connection", "limits")
......
......@@ -354,28 +354,43 @@ def scan_remove_users_shared_permissions(
# RECOVER
# ------- #
def recover(configuration, write_mode=False):
def recover(configuration, user_id=None, rescan=False, write_mode=False):
LOG.info(f'Running RECOVER in {"WRITE" if write_mode else "READ ONLY"} mode.')
max_users = configuration.limits.max_recovered_users
directory_connection = GoogleDirectoryConnection(configuration, write_mode=write_mode)
all_google_users = directory_connection.retrieve_users()
LOG.info(f"TOTAL users fetched: {len(all_google_users)}")
if user_id:
user_email = f"{user_id.lower()}@{configuration.google.domain_name}"
LOG.info(f"Finding user {user_email}")
user = directory_connection.retrieve_user(user_email)
if not user:
LOG.error(f"User {user_email} not found.")
return
if not user_marked_for_recovery(user):
LOG.error(f"User {user_email} not marked for recovery.")
return
LOG.info(f"User found with id {user['id']}")
filtered_users = [user]
else:
all_google_users = directory_connection.retrieve_users()
LOG.info(f"TOTAL users fetched: {len(all_google_users)}")
filtered_users = [
user
for user in all_google_users
if all(
[
user_email_is_crsid(user),
user_marked_for_recovery(user),
]
)
]
filtered_users = [
user
for user in all_google_users
if all(
[
user_email_is_crsid(user),
user_marked_for_recovery(user),
]
)
]
LOG.info(f"TOTAL users marked for recovery: {len(filtered_users)}.")
LOG.info(f"Maximum users to recover this run: {max_users}.")
LOG.info(f"TOTAL users marked for recovery: {len(filtered_users)}.")
LOG.info(f"Maximum users to recover this run: {max_users}.")
shared_drive_connection = GoogleDriveConnection(configuration, write_mode=write_mode)
shared_drive_id = configuration.google.shared_storage_drive
users_patch_data = {}
......@@ -383,7 +398,12 @@ def recover(configuration, write_mode=False):
for user in filtered_users[:max_users]:
try:
users_patch_data[user["id"]] = recover_users_shared_permissions(
user, shared_drive_connection, shared_drive_id, configuration, write_mode
user,
shared_drive_connection,
shared_drive_id,
configuration,
rescan=rescan,
write_mode=write_mode,
)
except (HttpError, UserProcessingError):
# Logged already in called function, continue processing
......@@ -398,7 +418,7 @@ def recover(configuration, write_mode=False):
def recover_users_shared_permissions(
user, shared_drive_connection, shared_drive_id, configuration, write_mode=False
user, shared_drive_connection, shared_drive_id, configuration, rescan=False, write_mode=False
):
LOG.info(f"Recovering user {user.get('primaryEmail', 'Unknown')}, who has id {user['id']}.")
result_of_scan = user.get("customSchemas", {}).get("UCam", {}).get(UcamField.RESULT, "")
......@@ -427,6 +447,11 @@ def recover_users_shared_permissions(
permissions_recovery_doc = yaml.load(raw_doc.decode("utf-8"), Loader=yaml.CLoader)
LOG.debug("Permissions document contents:\n" + pformat(permissions_recovery_doc))
if rescan:
permissions_recovery_doc = rescan_permissions(
user, configuration, permissions_recovery_doc
)
user_drive_connection = GoogleDriveConnection(
configuration, user["id"], write_mode=write_mode
)
......@@ -463,6 +488,50 @@ def recover_users_shared_permissions(
return None
def rescan_permissions(user, configuration, permissions_recovery_doc):
LOG.info("Rescanning for existing permissions...")
user_drive_connection = GoogleDriveConnection(configuration, user["id"], write_mode=False)
all_user_items = user_drive_connection.get_all_owned_files_metadata()
LOG.info(f"- has {len(all_user_items)} items.")
user_shared_items = {item["id"]: item for item in all_user_items if file_is_shared(item)}
LOG.info(f"- has {len(user_shared_items)} shared items.")
# Remove items from recovery doc if in existing shared items (and permissions match)
items_to_remove = []
for item_id, item_details in permissions_recovery_doc.items():
if item_id in user_shared_items:
if matching_permissions(
item_details["permissions"], user_shared_items[item_id]["permissions"]
):
LOG.info(f"- removing {item_id} from permissions recovery document.")
items_to_remove.append(item_id)
LOG.info(f"- removing {len(items_to_remove)} items from permissions recovery document.")
return {
item_id: item_details
for item_id, item_details in permissions_recovery_doc.items()
if item_id not in items_to_remove
}
def matching_permissions(p1, p2):
LOG.debug(f"Comparing permissions:\n{pformat(p1)}\n{pformat(p2)}")
if len(p1) != len(p2):
return False
# Compare permissions excluding id and sorted by (emailAddress, domain)
c1 = sorted(
[{k: v for k, v in p.items() if k != "id"} for p in p1],
key=lambda p: (p.get("emailAddress", ""), p.get("domain", "")),
)
c2 = sorted(
[{k: v for k, v in p.items() if k != "id"} for p in p2],
key=lambda p: (p.get("emailAddress", ""), p.get("domain", "")),
)
for i in range(len(c1)):
if c1[i] != c2[i]:
return False
return True
# SHARED DRIVES #
# ------------- #
def scan_shared_drives(configuration, write_mode=False):
......
......@@ -40,10 +40,18 @@ class MainTestCases(TestCase):
with mock.patch("gdrivemanager.sys.argv", ["gdrivemanager", "recover", "--write"]):
main()
with mock.patch("gdrivemanager.sys.argv", ["gdrivemanager", "recover", "--rescan"]):
main()
with mock.patch("gdrivemanager.sys.argv", ["gdrivemanager", "recover", "--user=ABC123"]):
main()
mock_recover.assert_has_calls(
[
mock.call(CONFIGURATION, write_mode=False),
mock.call(CONFIGURATION, write_mode=True),
mock.call(CONFIGURATION, user_id=None, rescan=False, write_mode=False),
mock.call(CONFIGURATION, user_id=None, rescan=False, write_mode=True),
mock.call(CONFIGURATION, user_id=None, rescan=True, write_mode=False),
mock.call(CONFIGURATION, user_id="ABC123", rescan=False, write_mode=False),
]
)
......
......@@ -19,8 +19,10 @@ from gdrivemanager.manage import (
flush_shared_drive_cache,
get_user_usage_cache,
get_users_for_report,
matching_permissions,
recover,
report,
rescan_permissions,
reset_manual_actions,
scan,
scan_shared_drives,
......@@ -754,9 +756,6 @@ class ManageTestCases(TestCase):
}
)
shared_drive_list_within_limit = datetime(2023, 5, 17, 10, 30, tzinfo=timezone.utc)
shared_drive_list_needs_rebuild = datetime(2023, 5, 30, 10, 30, tzinfo=timezone.utc)
@freeze_time(frozen_time)
@mock.patch("gdrivemanager.manage.GoogleDirectoryConnection")
@mock.patch("gdrivemanager.manage.GoogleDriveConnection")
......@@ -802,6 +801,253 @@ class ManageTestCases(TestCase):
# Batch update called with empty dictionary, no fields to update
mock_directory.return_value.batch_update_user_ucam_fields.assert_called_once_with({})
@freeze_time(frozen_time)
@mock.patch("gdrivemanager.manage.GoogleDirectoryConnection")
@mock.patch("gdrivemanager.manage.GoogleDriveConnection")
def test_recover_with_user(self, mock_drive, mock_directory):
mock_directory.return_value.retrieve_user.return_value = json.loads(
datafile("directory-ordinary-response.json")
)["users"][0]
mock_drive.return_value.get_file_contents_by_name.side_effect = [
(
"shared-permissions-file.yaml",
bytes(datafile("permissions-doc.yaml"), "utf-8"),
)
]
recover(CONFIGURATION, user_id="af123", write_mode=True)
# Construct directory object once
mock_directory.assert_called_once_with(CONFIGURATION, write_mode=True)
# Retrieve users called
mock_directory.return_value.retrieve_user.assert_called_once_with(
"af123@gdev.apps.cam.ac.uk"
)
# Construct shared drive connection, and the single user to scan (id 1) drive connection
mock_drive.assert_has_calls(
[
mock.call(CONFIGURATION, write_mode=True),
mock.call(CONFIGURATION, "1", write_mode=True),
],
any_order=True,
)
# Get the permissions document
mock_drive.return_value.get_file_contents_by_name.assert_called_once_with(
"1", drive_id=CONFIGURATION.google.shared_storage_drive
)
# Loaded permissions doc used to create shared permissions
mock_drive.return_value.batch_create_shared_permissions.assert_called_once_with(
yaml.safe_load(datafile("permissions-doc.yaml"))
)
mock_drive.return_value.update_file_metadata.assert_called_once_with(
"shared-permissions-file.yaml",
{"name": "HISTORIC-shared-permissions-file.yaml"},
drive_id=CONFIGURATION.google.shared_storage_drive,
)
# Update fields, for user 1 mark as recovered and clear action, for user 4 no shared
# permissions to recover so clear action only.
mock_directory.return_value.batch_update_user_ucam_fields.assert_called_once_with(
{
"1": {
"mydrive-shared-result": "permissions-recovered",
"mydrive-shared-action": "",
},
}
)
@freeze_time(frozen_time)
@mock.patch("gdrivemanager.manage.GoogleDirectoryConnection")
@mock.patch("gdrivemanager.manage.GoogleDriveConnection")
def test_recover_with_missing_user(self, mock_drive, mock_directory):
mock_directory.return_value.retrieve_user.return_value = None
recover(CONFIGURATION, user_id="af123", write_mode=True)
# Construct directory object once
mock_directory.assert_called_once_with(CONFIGURATION, write_mode=True)
# Retrieve users called
mock_directory.return_value.retrieve_user.assert_called_once_with(
"af123@gdev.apps.cam.ac.uk"
)
# Shared drive connection never constructed
mock_drive.assert_not_called()
@freeze_time(frozen_time)
@mock.patch("gdrivemanager.manage.GoogleDirectoryConnection")
@mock.patch("gdrivemanager.manage.GoogleDriveConnection")
def test_recover_with_user_not_marked(self, mock_drive, mock_directory):
user = json.loads(datafile("directory-ordinary-response.json"))["users"][0]
user["customSchemas"]["UCam"]["mydrive-shared-action"] = ""
mock_directory.return_value.retrieve_user.return_value = user
recover(CONFIGURATION, user_id="af123", write_mode=True)
# Construct directory object once
mock_directory.assert_called_once_with(CONFIGURATION, write_mode=True)
# Retrieve users called
mock_directory.return_value.retrieve_user.assert_called_once_with(
"af123@gdev.apps.cam.ac.uk"
)
# Shared drive connection never constructed
mock_drive.assert_not_called()
@freeze_time(frozen_time)
@mock.patch("gdrivemanager.manage.rescan_permissions")
@mock.patch("gdrivemanager.manage.GoogleDirectoryConnection")
@mock.patch("gdrivemanager.manage.GoogleDriveConnection")
def test_recover_with_rescan(self, mock_drive, mock_directory, mock_rescan):
user = json.loads(datafile("directory-ordinary-response.json"))["users"][0]
mock_directory.return_value.retrieve_user.return_value = user
mock_drive.return_value.get_file_contents_by_name.side_effect = [
(
"shared-permissions-file.yaml",
bytes(datafile("permissions-doc.yaml"), "utf-8"),
)
]
# One file removed from the permissions document
original_doc = yaml.safe_load(datafile("permissions-doc.yaml"))
filtered_doc = {k: v for k, v in original_doc.items() if k != "f1"}
mock_rescan.return_value = filtered_doc
recover(CONFIGURATION, user_id="af123", rescan=True, write_mode=True)
# Construct directory object once
mock_directory.assert_called_once_with(CONFIGURATION, write_mode=True)
# Retrieve users called
mock_directory.return_value.retrieve_user.assert_called_once_with(
"af123@gdev.apps.cam.ac.uk"
)
# Construct shared drive connection, and the single user to scan (id 1) drive connection
mock_drive.assert_has_calls(
[
mock.call(CONFIGURATION, write_mode=True),
mock.call(CONFIGURATION, "1", write_mode=True),
],
any_order=True,
)
# Get the permissions document
mock_drive.return_value.get_file_contents_by_name.assert_called_once_with(
"1", drive_id=CONFIGURATION.google.shared_storage_drive
)
# Rescan called with original document
mock_rescan.assert_called_once_with(user, CONFIGURATION, original_doc)
# Filtered by rescan permissions doc used to create shared permissions
mock_drive.return_value.batch_create_shared_permissions.assert_called_once_with(
filtered_doc
)
mock_drive.return_value.update_file_metadata.assert_called_once_with(
"shared-permissions-file.yaml",
{"name": "HISTORIC-shared-permissions-file.yaml"},
drive_id=CONFIGURATION.google.shared_storage_drive,
)
# Update fields, for user 1 mark as recovered and clear action, for user 4 no shared
# permissions to recover so clear action only.
mock_directory.return_value.batch_update_user_ucam_fields.assert_called_once_with(
{
"1": {
"mydrive-shared-result": "permissions-recovered",
"mydrive-shared-action": "",
},
}
)
@mock.patch("gdrivemanager.manage.GoogleDriveConnection")
def test_rescan_permissions(self, mock_drive):
original_doc = yaml.safe_load(datafile("permissions-doc.yaml"))
# Mock that files f1 and f4 already exist, are shared and have permissions though f4's
# permissions are slightly different
existing_files = {
k: {**v, "shared": True} for k, v in original_doc.items() if k in ["f1", "f4"]
}
existing_files["f4"]["permissions"] = [
{
"emailAddress": "rjg21@gdev.apps.cam.ac.uk",
"id": "p9", # was p3 but ignored in comparison anyway
"role": "viewer", # was "editor"
"type": "user",
},
{
"emailAddress": "mk2155@gdev.apps.cam.ac.uk",
"id": "p2",
"role": "owner",
"type": "user",
},
]
mock_drive.return_value.get_all_owned_files_metadata.return_value = [
{"id": k, **v} for k, v in existing_files.items()
]
user = json.loads(datafile("directory-ordinary-response.json"))["users"][0]
resulting_doc = rescan_permissions(user, CONFIGURATION, original_doc)
# Construct connection single user to scan (id 1) drive connection
mock_drive.assert_called_once_with(CONFIGURATION, "1", write_mode=False)
mock_drive.return_value.get_all_owned_files_metadata.assert_called_once_with()
# f1 should be removed from the resulting document as it no longer exists
self.assertNotIn("f1", resulting_doc)
# f2 should be present and with the original permissions
self.assertIn("f4", resulting_doc)
self.assertEqual(resulting_doc["f4"]["permissions"], original_doc["f4"]["permissions"])
# f5 shouldn't have been removed as it didn't exist
self.assertIn("f5", resulting_doc)
def test_matching_permissions(self):
writer = {
"id": "p1",
"emailAddress": "rjg21@gdev.apps.cam.ac.uk",
"role": "writer",
"type": "user",
}
reader = {
"id": "p2",
"emailAddress": "rjg21@gdev.apps.cam.ac.uk",
"role": "reader",
"type": "user",
}
domain_reader = {
"id": "p3",
"domain": "gdev.apps.cam.ac.uk",
"role": "reader",
"type": "domain",
}
anyone_writer = {
"id": "anyoneWithLink",
"role": "writer",
"type": "anyone",
}
# different lengths don't match
self.assertFalse(matching_permissions([writer], [writer, reader]))
# different roles don't match
self.assertFalse(matching_permissions([writer], [reader]))
# different types don't match
self.assertFalse(matching_permissions([reader], [domain_reader]))
self.assertFalse(matching_permissions([writer], [anyone_writer]))
# same permissions match
self.assertTrue(
matching_permissions(
[reader, writer, domain_reader, anyone_writer],
[reader, writer, domain_reader, anyone_writer],
)
)
# ordering doesn't matter
self.assertTrue(
matching_permissions(
[reader, writer, domain_reader, anyone_writer],
[reader, domain_reader, anyone_writer, writer],
)
)
# different ids are ignored
similar_writer = {k: (v if k != "id" else "p9") for k, v in writer.items()}
self.assertTrue(matching_permissions([writer], [similar_writer]))
shared_drive_list_within_limit = datetime(2023, 5, 17, 10, 30, tzinfo=timezone.utc)
shared_drive_list_needs_rebuild = datetime(2023, 5, 30, 10, 30, tzinfo=timezone.utc)
......
......@@ -16,7 +16,7 @@ def load_requirements():
setup(
name="gdrivemanager",
version="1.1.1",
version="1.1.2",
packages=find_packages(),
install_requires=load_requirements(),
entry_points={
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment