FAQ | This is a LIVE service | Changelog

Commit 7a755b6c authored by Robin Goodall's avatar Robin Goodall 💬
Browse files

Merge branch 'ignore-missing-usns' into 'master'

Filter out USNs from group add changes if lookup doesn't recognise them

Closes #6

See merge request !4
parents 76f04e2a 84fb45aa
Pipeline #211565 passed with stages
in 3 minutes and 19 seconds
......@@ -20,7 +20,9 @@ variables:
cobertura: ./artefacts/**/coverage.xml
coverage_format: cobertura
path: ./artefacts/**/coverage.xml
stage: deploy
......@@ -49,8 +49,9 @@ from .api_gateway import create_api_gateway_session
from .inst_mapping import fetch_inst_mapping
from .student_api import get_students_by_group
from .lookup import (
create_lookup_connection, compare_with_lookup_groups,
update_lookup_groups, create_lookup_groups, strip_groups_missing_insts)
create_lookup_connection, compare_with_lookup_groups, check_usns_in_lookup,
filter_usns_from_changes, create_lookup_groups, strip_groups_missing_insts,
STUDENT_API_ROOT = 'https://api.apps.cam.ac.uk/university-student/v1alpha2/'
INST_MAPPING_API_ROOT = 'https://api.apps.cam.ac.uk/institutions/mapping/v1/'
......@@ -105,7 +106,7 @@ def _student_inst_members(opts: dict, dry_run: bool):
# Build a map of Lookup group name to sets of students within that institution with status
# matching career.
students_by_group = get_students_by_group(session, inst_map)
students_by_group, student_names_by_id = get_students_by_group(session, inst_map)
# Sanity check
if len(students_by_group) == 0:
......@@ -121,14 +122,30 @@ def _student_inst_members(opts: dict, dry_run: bool):
ibis_conn = create_lookup_connection(opts)
ibis_group_methods = ibisclient.GroupMethods(ibis_conn)
ibis_person_methods = ibisclient.PersonMethods(ibis_conn)
ibis_inst_methods = ibisclient.InstitutionMethods(ibis_conn)
# Calculate changes and find missing groups
(missing_groups, group_changes) = compare_with_lookup_groups(
ibis_group_methods, students_by_group)
ibis_group_methods, students_by_group, student_names_by_id)
# Find USNs (that are being added) that are not recognised by lookup
all_usns_to_add = {
for _, changes in group_changes.items()
for usn in changes['add']
missing_usns = check_usns_in_lookup(ibis_person_methods, all_usns_to_add)
# Filter missing USNs from group changes
filter_usns_from_changes(group_changes, missing_usns, student_names_by_id)
# No need to create groups if they no longer have members to add
missing_groups = {
group for group in missing_groups if group_changes.get(group)['add']
if missing_groups:
# Create groups that couldn't be found
ibis_inst_methods = ibisclient.InstitutionMethods(ibis_conn)
missing_insts = create_lookup_groups(ibis_inst_methods, missing_groups, dry_run)
if missing_insts:
LOG.info('%s institution(s) not found', len(missing_insts))
......@@ -140,4 +157,4 @@ def _student_inst_members(opts: dict, dry_run: bool):
previous_count - len(group_changes))
# Make changes to Lookup groups
update_lookup_groups(ibis_group_methods, group_changes, dry_run)
update_lookup_groups(ibis_group_methods, group_changes, student_names_by_id, dry_run)
from typing import Dict
import logging
import os
import sys
......@@ -6,12 +5,14 @@ import requests_oauthlib
from identitylib.identifiers import Identifier, IdentifierSchemes
from .types import InstMapping
INST_MAPPING_API_ROOT = 'https://api.apps.cam.ac.uk/institutions/mapping/v1/'
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
def fetch_inst_mapping(session: requests_oauthlib.OAuth2Session) -> Dict[str, str]:
def fetch_inst_mapping(session: requests_oauthlib.OAuth2Session) -> InstMapping:
Fetch institutional mapping dict from API Gateway. Use it to compile a mapping of Student
Records Institution ids to Lookup instids
......@@ -19,7 +20,7 @@ def fetch_inst_mapping(session: requests_oauthlib.OAuth2Session) -> Dict[str, st
r = session.get(INST_MAPPING_API_ROOT)
inst_map = {}
inst_map: InstMapping = {}
for datum in r.json().get('institutions', []):
for i in datum.get('identifiers', []):
from typing import Dict, List, Set, Tuple
import logging
from collections.abc import Generator, Iterable
import os
import sys
import logging
import itertools
import ibisclient
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
from .types import (GroupChange, GroupChanges, GroupName, GroupSet,
InstIdSet, LookupInstId,
StudentId, StudentIdSet, StudentMapping, StudentsByGroup)
# Convenient type definition for group changes dict
GroupChanges = Dict[str, Dict[str, Set[str]]]
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
# Make group 'career' suffix to group title suffix
......@@ -22,6 +25,9 @@ GROUP_DESCRIPTION = (
# Transaction comments for group creation and update
# Request lists of people from lookup in chunks to avoid overflowing querystring
def create_lookup_connection(opts: dict) -> ibisclient.IbisClientConnection:
......@@ -50,7 +56,7 @@ def create_lookup_connection(opts: dict) -> ibisclient.IbisClientConnection:
return ibis_conn
def group_name(instid: str, career: str):
def group_name(instid: LookupInstId, career: str) -> GroupName:
Create Lookup group name from institution id and academic career mapping
......@@ -65,7 +71,8 @@ def group_name(instid: str, career: str):
def compare_with_lookup_groups(
ibis_group_methods: ibisclient.GroupMethods,
students_by_group: Dict[str, Set[str]]) -> Tuple[Set[str], GroupChanges]:
students_by_group: StudentsByGroup,
student_names_by_id: StudentMapping) -> tuple[GroupSet, GroupChanges]:
Check each lookup group exists and compare its membership to supplied set.
Provide set of missing Lookup groups, and necessary changes as a dict mapping
......@@ -73,23 +80,22 @@ def compare_with_lookup_groups(
# calculate changes
missing_groups = set()
group_changes = dict()
missing_groups: GroupSet = set()
group_changes: GroupChanges = dict()
for group, students in sorted(students_by_group.items()):
# Check that group exists
if ibis_group_methods.getGroup(group) is None:
# Will want to add everyone after creating the group
group_changes[group] = {'add': students, 'remove': set()}
group_changes[group] = GroupChange(add=students, remove=set())
LOG.info('Group "%s" needs creating with %s students', group, len(students))
LOG.info('Group "%s" should have %s student(s):', group, len(students))
# Get lookup direct membership
members: List[ibisclient.IbisPerson] = ibis_group_methods.getDirectMembers(
members: list[ibisclient.IbisPerson] = ibis_group_methods.getDirectMembers(
group, 'all_identifiers')
LOG.info('- Lookup has %s member(s)', len(members))
# Get set of USNs from membership
group_usns = {
......@@ -98,38 +104,114 @@ def compare_with_lookup_groups(
for id in person.identifiers if id.scheme == 'usn'
'- with %s USNs%s', len(group_usns),
'- Lookup has %s member(s) with %s USNs%s',
len(members), len(group_usns),
' - mismatch with membership' if len(group_usns) != len(members) else ''
# Determine who to add and/or remove
to_add = students - group_usns
LOG.info('- %s need adding', len(to_add))
if to_add:
LOG.info('- %s need%s adding', len(to_add), 's' if len(to_add) == 1 else '')
to_remove = group_usns - students
LOG.info('- %s need removing', len(to_remove))
if to_add | to_remove:
group_changes[group] = {'add': to_add, 'remove': to_remove}
if to_remove:
LOG.info('- %s need%s removing', len(to_remove), 's' if len(to_add) == 1 else '')
# Add their names to our id to name mapping
id.value: person.visibleName
for person in members if person.identifiers is not None
for id in person.identifiers if id.scheme == 'usn' and id.value in to_remove
if to_add or to_remove:
group_changes[group] = GroupChange(add=to_add, remove=to_remove)
LOG.info('%s group(s) need creating', len(missing_groups))
LOG.info('%s group(s) need changes', len(group_changes))
return (missing_groups, group_changes)
def chunker(n: int, ids: Iterable[StudentId]) -> Generator[tuple[int, StudentIdSet], None, None]:
Yield chunks of size `n` from `ids`. The last chunk may be smaller.
If `n` is not greater than the size of `ids`, just yields the one chunk
if n <= 0:
yield 0, set(ids)
iterators = [iter(ids)] * n
chunks = itertools.zip_longest(*iterators, fillvalue=None)
# unfortunately we need to drop the Nones on the last element
for i, chunk in enumerate(chunks):
yield i, set(filter(None, chunk))
def check_usns_in_lookup(
ibis_person_methods: ibisclient.PersonMethods,
all_usns_to_add: StudentIdSet) -> StudentIdSet:
Check that the set of students to add have USNs recognised by Lookup.
Return a list of those that don't.
matched_usns: StudentIdSet = set()
LOG.info('Checking USNs exist in Lookup:')
for idx, usns in chunker(CHUNK_SIZE, all_usns_to_add):
people: list[ibisclient.IbisPerson] = ibis_person_methods.listPeople(
[f'usn/{usn}' for usn in usns],
usns_of_people: StudentIdSet = {
for person in people if person.identifiers is not None
for id in person.identifiers if id.scheme == 'usn'
LOG.info(f'- batch {idx} : missing {len(usns) - len(usns_of_people)} of {len(usns)} USNs')
matched_usns |= usns_of_people
return all_usns_to_add - matched_usns
def filter_usns_from_changes(
group_changes: GroupChanges, missing_usns: StudentIdSet,
student_names_by_id: StudentMapping
) -> None:
Filter USNs to add from the changes if they are in the missing USNs list.
Log which USNs are ignored for each group.
for group, changes in group_changes.items():
missing_in_group_add = {
usn for usn in changes['add'] if usn in missing_usns
if not missing_in_group_add:
LOG.info('Ignoring in group %s:', group)
for usn in missing_in_group_add:
LOG.info('- usn/%s (%s)', usn, student_names_by_id.get(usn, 'Unknown'))
# remove ignored USNs from set to add
changes['add'] -= missing_in_group_add
def update_lookup_groups(
ibis_group_methods: ibisclient.GroupMethods,
group_changes: GroupChanges,
dry_run: bool = True):
student_names_by_id: StudentMapping,
dry_run: bool = True) -> None:
Log and update (if not a dry-run) group memberships
for group, changes in group_changes.items():
if not changes['add'] and not changes['remove']:
LOG.info('Updating %s:', group)
for usn in changes['add']:
LOG.info('- adding usn/%s', usn)
LOG.info('- adding usn/%s (%s)', usn, student_names_by_id.get(usn, 'Unknown'))
for usn in changes['remove']:
LOG.info('- removing usn/%s', usn)
LOG.info('- removing usn/%s (%s)', usn, student_names_by_id.get(usn, 'Unknown'))
if dry_run:
LOG.info('- skipping update in dry-run mode')
......@@ -143,8 +225,8 @@ def update_lookup_groups(
def create_lookup_groups(
ibis_inst_methods: ibisclient.InstitutionMethods,
groups_to_create: Set[str],
dry_run: bool = True) -> Set[str]:
groups_to_create: GroupSet,
dry_run: bool = True) -> InstIdSet:
Log and create (if not a dry-run) lookup groups with names, titles, descriptions
and the current lookup account as the only manager.
......@@ -154,9 +236,9 @@ def create_lookup_groups(
# Get current authenticated lookup account
managed_by = ibis_inst_methods.conn.username
# Cache institution details so we can name, title and describe groups appropriately
institutions: Dict[str, ibisclient.IbisInstitution] = {}
institutions: dict[LookupInstId, ibisclient.IbisInstitution] = {}
# Compile a set of institutions we couldn't find
missing_insts = set()
missing_insts: InstIdSet = set()
for group in sorted(groups_to_create):
# split group name into instid and career (ug or pg)
......@@ -188,7 +270,7 @@ def create_lookup_groups(
def strip_groups_missing_insts(
group_changes: GroupChanges, missing_insts: Set[str]) -> GroupChanges:
group_changes: GroupChanges, missing_insts: InstIdSet) -> GroupChanges:
Remove group changes that would belong to institutions that couldn't be found.
from typing import Dict, Generator, List, Optional, Set
from typing import Optional
from collections.abc import Generator
import logging
import os
import sys
......@@ -9,6 +10,7 @@ import urllib.parse
import requests_oauthlib
from identitylib.identifiers import IdentifierSchemes
from .types import InstMapping, StudentMapping, StudentsByGroup
from .lookup import group_name
STUDENT_API_ROOT = 'https://api.apps.cam.ac.uk/university-student/v1alpha2/'
......@@ -22,7 +24,8 @@ LOG = logging.getLogger(os.path.basename(sys.argv[0]))
def get_students_by_group(
session: requests_oauthlib.OAuth2Session, inst_map: Dict[str, str]) -> Dict[str, Set[str]]:
session: requests_oauthlib.OAuth2Session,
inst_map: InstMapping) -> tuple[StudentsByGroup, StudentMapping]:
Create a map from Lookup group name to sets of students within that institution with status
matching career. Group names are formed from the Lookup instids and student identifiers
......@@ -31,8 +34,11 @@ def get_students_by_group(
Note that since students can be members of more than one institution, the sum of the lengths
of each sets may not equal the length of the union of all of the sets.
Also create mapping for student ids to names for logging.
students_by_group = {}
students_by_group: StudentsByGroup = {}
student_names_by_id: StudentMapping = {}
# Capture ignored affiliations and careers
ignored_affiliations = set()
......@@ -85,8 +91,9 @@ def get_students_by_group(
if id_scheme != IdentifierSchemes.USN:
# Add USN to group
# Add USN and remember name for USN
student_names_by_id[i.value] = f'{s.forenames} {s.surname}'
# Report ignored values (possibly missing from inst mapping or career mapping above)
if ignored_affiliations:
......@@ -98,7 +105,7 @@ def get_students_by_group(
for a in sorted(ignored_careers):
LOG.info(f'- {a}')
return students_by_group
return students_by_group, student_names_by_id
# Student API schema
......@@ -129,15 +136,15 @@ class Student(pydantic.BaseModel):
Student resource from Student API.
affiliations: List[StudentAffiliation]
affiliations: list[StudentAffiliation]
forenames: str
identifiers: List[StudentIdentifier]
identifiers: list[StudentIdentifier]
namePrefixes: str
surname: str
def fetch_all_students(
session: requests_oauthlib.OAuth2Session) -> Generator[List[Student], None, None]:
session: requests_oauthlib.OAuth2Session) -> Generator[list[Student], None, None]:
Fetch all students from the students API. *Generates* a list of Student resources.
from typing import Dict
from contextlib import contextmanager
from tempfile import NamedTemporaryFile
import os
......@@ -35,7 +34,7 @@ class MockResponse():
class MockSession():
def __init__(self, responses: Dict[str, Dict]):
def __init__(self, responses: dict[str, dict]):
self.responses = responses
self.urls_got = set()
from typing import List, Dict, Optional, Tuple, Union
from typing import Optional, Union
import faker
from identitylib.identifiers import IdentifierSchemes
......@@ -13,7 +13,7 @@ class StudentProvider(faker.providers.BaseProvider):
def student(self, **kwargs) -> Dict[str, Union[str, Dict]]:
def student(self, **kwargs) -> dict[str, Union[str, list, dict]]:
A single fake student, overriding properties as necessary
......@@ -35,13 +35,13 @@ class StudentProvider(faker.providers.BaseProvider):
else (' ' + self.generator.first_name())
def affiliations(self, num_affiliations=1, **kwargs) -> List[Dict[str, str]]:
def affiliations(self, num_affiliations=1, **kwargs) -> list[dict[str, str]]:
return [self.affiliation(**kwargs) for _ in range(num_affiliations)]
def identifiers(self, num_identifiers=1, **kwargs) -> List[Dict[str, str]]:
def identifiers(self, num_identifiers=1, **kwargs) -> list[dict[str, str]]:
return [self.identifier(**kwargs) for _ in range(num_identifiers)]
def affiliation(self, **kwargs) -> Dict[str, str]:
def affiliation(self, **kwargs) -> dict[str, str]:
value, scheme = (kwargs.get('affiliation_id') or self.affiliation_id(**kwargs)).split('@')
start, end = kwargs.get('affiliation_period') or self.affiliation_period(**kwargs)
return {
......@@ -56,14 +56,14 @@ class StudentProvider(faker.providers.BaseProvider):
# Provides example affiliation (in test mapping) with expected scheme
return f'{SR_INST_VALUE}@{IdentifierSchemes.STUDENT_INSTITUTION}'
def affiliation_period(self, **kwargs) -> Tuple[Optional[str], Optional[str]]:
def affiliation_period(self, **kwargs) -> tuple[Optional[str], Optional[str]]:
# Unspecified start and end dates
return (None, None)
def affiliation_status(self, **kwargs) -> str:
return self.random_element(ACADEMIC_CAREER_MAPPING.keys())
def identifier(self, **kwargs) -> Dict[str, str]:
def identifier(self, **kwargs) -> dict[str, str]:
value, scheme = (kwargs.get('identifier_id') or self.identifier_id(**kwargs)).split('@')
return {
'value': value,
from typing import Dict, List
import logging
from unittest.mock import MagicMock, patch
import pytest
......@@ -6,8 +6,8 @@ import ibisclient
from . import temp_password_file
from lookupsync.lookup import (
create_lookup_connection, compare_with_lookup_groups,
create_lookup_groups, update_lookup_groups,
check_usns_in_lookup, create_lookup_connection, compare_with_lookup_groups,
create_lookup_groups, filter_usns_from_changes, update_lookup_groups,
......@@ -105,7 +105,21 @@ def mock_students_by_group():
def mock_lookup_group_members(mock_students_by_group):
def mock_student_names_by_id():
An example dict of student ids to their visible names
return {
'123456789': 'Emil Tatton',
'234567890': 'Pen Goddard',
'345678901': 'Lucia Bennington',
'456789012': 'Lynette Massey',
'567890123': 'Dorthy Adams',
def mock_lookup_group_members(mock_students_by_group, mock_student_names_by_id):
An example dict of Lookup groups to list of ibisclient.IbisPerson records matching
students_by_group mock
......@@ -116,6 +130,7 @@ def mock_lookup_group_members(mock_students_by_group):
setattr(identifier, 'scheme', 'usn')
setattr(identifier, 'value', usn)
setattr(person, 'identifiers', [identifier])
setattr(person, 'visibleName', mock_student_names_by_id.get(usn))
return person
return {
......@@ -126,7 +141,7 @@ def mock_lookup_group_members(mock_students_by_group):
# Mock ibisclient.GroupMethods
class MockGroupMethods:
def __init__(self, groups: List[Dict[str, ibisclient.IbisPerson]] = list()) -> None:
def __init__(self, groups: list[dict[str, ibisclient.IbisPerson]] = []) -> None:
self.groups = groups
self.updates = {}
......@@ -149,7 +164,7 @@ def test_compare_with_lookup_groups(mock_students_by_group, mock_lookup_group_me
# Perform comparison
mock_group_methods = MockGroupMethods(mock_lookup_group_members)
missing_groups, group_changes = compare_with_lookup_groups(
mock_group_methods, mock_students_by_group)
mock_group_methods, mock_students_by_group, {})
# Empty results
assert missing_groups == set()
......@@ -172,7 +187,7 @@ def test_compare_with_lookup_groups_missing_group(
# Perform comparison
mock_group_methods = MockGroupMethods(mock_lookup_group_members)
missing_groups, group_changes = compare_with_lookup_groups(
mock_group_methods, mock_students_by_group)
mock_group_methods, mock_students_by_group, {})
# Just new group reported missing and all members appear in changes to be added
assert missing_groups == {NEW_GROUP}
......@@ -180,7 +195,7 @@ def test_compare_with_lookup_groups_missing_group(
def test_compare_with_lookup_groups_changes(
mock_students_by_group, mock_lookup_group_members):
mock_students_by_group, mock_lookup_group_members, mock_student_names_by_id):
Group changes to add and/or remove members are reported
......@@ -189,31 +204,35 @@ def test_compare_with_lookup_groups_changes(
USN_TO_ADD = '999999999'
NAME_TO_ADD = 'Kacie Elwyn'
USN_TO_REMOVE = '456789012'
NAME_TO_REMOVE = 'Lynette Massey'
# Assert groups exist
assert GROUP_TO_ADD_ONLY_TO in mock_students_by_group
assert GROUP_TO_REMOVE_ONLY_FROM in mock_students_by_group
assert GROUP_TO_ADD_TO_AND_REMOVE_FROM in mock_students_by_group
# Assert USNs to add don't already exist in appropriate groups
# Assert USN student to add don't already exist in appropriate groups
assert USN_TO_ADD not in mock_students_by_group[GROUP_TO_ADD_ONLY_TO]
assert USN_TO_ADD not in mock_students_by_group[GROUP_TO_ADD_TO_AND_REMOVE_FROM]
# Assert USNs to remove do already exist in appropriate groups
# Assert USN of student to remove does already exist in appropriate groups
assert USN_TO_REMOVE in mock_students_by_group[GROUP_TO_REMOVE_ONLY_FROM]
assert USN_TO_REMOVE in mock_students_by_group[GROUP_TO_ADD_TO_AND_REMOVE_FROM]
# Make changes
mock_student_names_by_id[USN_TO_ADD] = NAME_TO_ADD
del mock_student_names_by_id[USN_TO_REMOVE]
# Perform comparison
mock_group_methods = MockGroupMethods(mock_lookup_group_members)
missing_groups, group_changes = compare_with_lookup_groups(
mock_group_methods, mock_students_by_group)
mock_group_methods, mock_students_by_group, mock_student_names_by_id)
# No missing groups but appropriate changes reported
assert missing_groups == set()
......@@ -222,9 +241,11 @@ def test_compare_with_lookup_groups_changes(
GROUP_TO_REMOVE_ONLY_FROM: {'add': set(), 'remove': {USN_TO_REMOVE}},
# Student to remove has their name mapped from their USN
assert mock_student_names_by_id.get(USN_TO_REMOVE) == NAME_TO_REMOVE
def test_update_lookup_groups():
def test_update_lookup_groups(caplog):
update_lookup_groups calls updateDirectMembers method of ibisclient.GroupMethods instance
for each group with matching changes
......@@ -236,18 +257,20 @@ def test_update_lookup_groups():
'foo-sis-pg': {'add': set(), 'remove': {'333', '444'}},
'bar-sis-ug': {'add': {'555', '666'}, 'remove': {'777', '888'}},
NAMES_BY_ID = {str(x)*3: f'Name {x}' for x in range(1, 9)}
mock_group_methods = MockGroupMethods()
# Make update calls (dry-run)