FAQ | This is a LIVE service | Changelog

Commit 051d1bff authored by Monty Dawson's avatar Monty Dawson
Browse files

Merge branch 'complete-student-inst-members' into 'master'

Complete student inst members

See merge request !1
parents 7b465b76 25a16e8c
Pipeline #191981 passed with stages
in 3 minutes and 26 seconds
FROM registry.gitlab.developers.cam.ac.uk/uis/devops/infra/dockerimages-private/python-oracle-instantclient
# oracle-instantclient image is based on ubuntu
FROM registry.gitlab.developers.cam.ac.uk/uis/devops/infra/dockerimages/python:3.9-slim
WORKDIR /usr/src/app
......
# Lookup Membership Synchronisation Tool
Tool to query Lookup for CHRIS and CAMSIS insitutional membership.
Tool to query Lookup for CHRIS and CamSIS institutional membership.
> This tool is incomplete.
> This tool is currently only has the one operation `student-inst-members` that is able to compare
> CamSIS student affiliations to Lookup group membership and update as appropriate. It is not able
> to create Lookup groups.
>
> Additionally, an operation to do the same for CHRIS institution membership is yet to be
> implemented.
## Installation
......@@ -16,9 +21,9 @@ pip3 install --user https://gitlab.developers.cam.ac.uk/uis/devops/iam/ibis/memb
See the output from `--help` for usage.
## Programattic use
## Programmatic use
This tool can also be called programatically by importing the `main` function
This tool can also be called programmatically by importing the `main` function
and calling it with command line arguments:
```python
......
......@@ -6,7 +6,7 @@ Usage:
lookupsync student-inst-members --gateway-client-id=CLIENT_ID --lookup-username=USERNAME
( --gateway-client-secret=CLIENT_SECRET | --gateway-client-secret-from=PATH )
( --lookup-password=PASSWORD | --lookup-password-from=PATH )
[--quiet] [--debug]
[--quiet] [--debug] [--lookup-test] [--really-do-this]
Options:
-h, --help Show a brief usage summary.
......@@ -28,26 +28,35 @@ Options:
Leading and trailing whitespace is trimmed. This is preferable
to passing secrets on the command line.
--lookup-test Use Lookup test instance instead of production
--really-do-this Actually attempt to update lookup group memberships, otherwise
just output what would have been changed.
Operations:
The student-inst-members operation will synchronise membership of institutions from CamSIS'
affiliation records.
"""
import datetime
import logging
import os
import sys
import typing
import urllib.parse
import docopt
import oauthlib.oauth2
import pydantic
import requests_oauthlib
API_GATEWAY_TOKEN_URL = 'https://api.apps.cam.ac.uk/oauth2/v1/token'
import ibisclient
from .api_gateway import create_api_gateway_session
from .inst_mapping import fetch_inst_mapping
from .student_api import get_students_by_group
from .lookup import create_lookup_connection, compare_with_lookup_groups, update_lookup_groups
STUDENT_API_ROOT = 'https://api.apps.cam.ac.uk/university-student/v1alpha2/'
INST_MAPPING_API_ROOT = 'https://api.apps.cam.ac.uk/institutions/mapping/v1/'
ACADEMIC_CAREER_MAPPING = {
'UGRD': 'ug',
'PGRD': 'pg',
}
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
......@@ -66,166 +75,59 @@ def main(argv=None):
logging.WARN if opts["--quiet"] else logging.INFO
)
dry_run = not opts['--really-do-this']
if dry_run:
LOG.warning('Operating in dry-run mode - no changes will be made')
if opts['student-inst-members']:
_student_inst_members(opts)
_student_inst_members(opts, dry_run)
# OPERATIONS
def _student_inst_members(opts):
def _student_inst_members(opts: dict, dry_run: bool):
"""
Synchronise institutional memberships for students.
"""
# TODO: This is incomplete.
session = _create_api_gateway_session(opts)
# Fetch insitutional mapping and pre-compute a dict mapping
# institution.v1.student-records.university.identifiers.cam.ac.uk ids to lookup instids.
r = session.get(INST_MAPPING_API_ROOT)
r.raise_for_status()
inst_map = {}
for datum in r.json().get('institutions', []):
for i in datum.get('identifiers', []):
try:
value, scheme = i.split('@')
except ValueError:
LOG.warn('Ignoring identifier with non-email formatting: %s', i)
continue
# TODO: looks like the scheme returned by the insitutional map API doesn't match the
# scheme returned by the student API?
if scheme == 'institution.v1.student.university.identifiers.cam.ac.uk':
inst_map[value] = datum['instid']
LOG.info('Fetched mapping for %s institutions.', len(inst_map))
# A map from institutions to sets of students within that institution. Institution identifiers
# are Lookup instids and student identifiers are those with scheme
# person.v1.student-records.university.identifiers.cam.ac.uk.
#
# Note that since students can be members of more than one insitution, the sum of the lengths
# of each sets may not equal the length of the union of all of the sets.
students_by_inst = {}
# TODO: map from person.v1.student-records.university.identifiers.cam.ac.uk to CRSid via USN
# identifier scheme in Lookup.
# Fetch all students parsed as instances of Student.
today = datetime.date.today()
for s in _fetch_all_students(session):
# Add student to any institutions they are affiliated with.
for a in s.affiliations:
# Ignore non college/departmental affiliations.
if a.scheme != 'institution.v1.student-records.university.identifiers.cam.ac.uk':
continue
# Ignore expired or yet to be affiliations.
if a.end is not None and a.end < today:
continue
if a.start is not None and a.start > today:
continue
# Find the instid for this institution. Continue if there is no known mapping.
instid = inst_map.get(a.value)
if instid is None:
continue
# Ensure there is a set of student ids for this institution.
if instid not in students_by_inst:
students_by_inst[instid] = set()
student_ids = students_by_inst[instid]
for i in s.identifiers:
# Only identifiers from a known scheme are used.
if i.scheme != 'person.v1.student-records.university.identifiers.cam.ac.uk':
continue
# Add identifier to list.
student_ids.add(i.value)
session = create_api_gateway_session(opts)
# Log some stats
LOG.info('Fetched record(s) for %s institutions.', len(students_by_inst))
for inst_id, students in students_by_inst.items():
LOG.info('Institution "%s" has %s student(s).', inst_id, len(students))
LOG.info('Total affiliation count: %s', sum(len(s) for s in students_by_inst.values()))
all_students = set()
for s in students_by_inst.values():
all_students |= s
LOG.info('Total student count: %s', len(all_students))
# TODO: actually perform synchronisation here.
return '{ "status": "ok" }', {'Content-Type': 'application/json'}
# Fetch institutional mapping, a dict mapping Student Records Inst ids to Lookup instids
inst_map = fetch_inst_mapping(session)
# Sanity check
if len(inst_map) == 0:
raise RuntimeError('Failed to fetch any institutional mappings')
# UTILITY FUNCTIONS AND CLASSES
# Build a map of Lookup group name to sets of students within that institution with status
# matching career.
students_by_group = get_students_by_group(session, inst_map)
# Sanity check
if len(students_by_group) == 0:
raise RuntimeError('Failed to fetch any student records')
class StudentIdentifier(pydantic.BaseModel):
"""
Identifier resource from Student API.
"""
scheme: str
value: str
class StudentAffiliation(pydantic.BaseModel):
"""
Affiliation resource from Student API.
"""
end: typing.Optional[datetime.date]
scheme: str
start: typing.Optional[datetime.date]
status: str
value: str
class Student(pydantic.BaseModel):
"""
Student resource from Student API.
"""
affiliations: typing.List[StudentAffiliation]
forenames: str
identifiers: typing.List[StudentIdentifier]
namePrefixes: str
surname: str
def _create_api_gateway_session(opts):
"""
Create a requests session object authenticated to use the API Gateway via the credentials
passed in lookup_sync_client_credentials.
"""
client_id = opts['--gateway-client-id']
client_secret = opts.get('--gateway-client-secret')
client_secret_from = opts.get('--gateway-client-secret-from')
if client_secret_from is not None:
with open(client_secret_from) as fobj:
client_secret = fobj.read().strip()
# Log some stats
LOG.info('Fetched record(s) for %s groups.', len(students_by_group))
LOG.info('Total affiliation count: %s', sum(len(s) for s in students_by_group.values()))
all_students = set()
for s in students_by_group.values():
all_students |= s
LOG.info('Total student count: %s', len(all_students))
client = oauthlib.oauth2.BackendApplicationClient(client_id=client_id)
oauth = requests_oauthlib.OAuth2Session(client=client)
oauth.fetch_token(
token_url=API_GATEWAY_TOKEN_URL, client_id=client_id, client_secret=client_secret)
return oauth
ibis_conn = create_lookup_connection(opts)
ibis_group_methods = ibisclient.GroupMethods(ibis_conn)
# Calculate changes and find missing groups
(missing_groups, group_changes) = compare_with_lookup_groups(
ibis_group_methods, students_by_group)
def _fetch_all_students(session):
"""
Fetch all students from the students API. *Generates* a list of Student resources.
if missing_groups:
# Shame we cannot automatically do this through API
LOG.info('Groups that need creating:')
for group in sorted(missing_groups):
LOG.info(f'- {group}')
"""
next_url = urllib.parse.urljoin(STUDENT_API_ROOT, 'students')
while next_url is not None:
LOG.info('Fetching %s...', next_url)
r = session.get(next_url)
r.raise_for_status()
data = r.json()
for s in data.get('results', []):
yield Student.parse_obj(s)
next_url = data.get('next')
# Make changes to Lookup groups
update_lookup_groups(ibis_group_methods, group_changes, dry_run)
......@@ -2,4 +2,4 @@ import sys
from . import main
if __name__ == '__main__':
main(sys.argv)
main(sys.argv[1:])
import requests_oauthlib
import oauthlib.oauth2
API_GATEWAY_TOKEN_URL = 'https://api.apps.cam.ac.uk/oauth2/v1/token'
def create_api_gateway_session(opts: dict) -> requests_oauthlib.OAuth2Session:
"""
Create a requests session object authenticated to use the API Gateway via the credentials
passed in lookup_sync_client_credentials.
"""
client_id = opts['--gateway-client-id']
client_secret = opts.get('--gateway-client-secret')
client_secret_from = opts.get('--gateway-client-secret-from')
if client_secret_from is not None:
with open(client_secret_from) as fobj:
client_secret = fobj.read().strip()
client = oauthlib.oauth2.BackendApplicationClient(client_id=client_id)
oauth = requests_oauthlib.OAuth2Session(client=client)
oauth.fetch_token(
token_url=API_GATEWAY_TOKEN_URL, client_id=client_id, client_secret=client_secret)
return oauth
from typing import Dict
import logging
import os
import sys
import requests_oauthlib
from identitylib.identifiers import Identifier, IdentifierSchemes
INST_MAPPING_API_ROOT = 'https://api.apps.cam.ac.uk/institutions/mapping/v1/'
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
def fetch_inst_mapping(session: requests_oauthlib.OAuth2Session) -> Dict[str, str]:
"""
Fetch institutional mapping dict from API Gateway. Use it to compile a mapping of Student
Records Institution ids to Lookup instids
"""
r = session.get(INST_MAPPING_API_ROOT)
r.raise_for_status()
inst_map = {}
for datum in r.json().get('institutions', []):
for i in datum.get('identifiers', []):
try:
id = Identifier.from_string(i, find_by_alias=True)
except ValueError as e:
LOG.warning(e)
continue
# Only interested in Student Records Institution ids
if id.scheme == IdentifierSchemes.STUDENT_INSTITUTION:
inst_map[id.value] = datum['instid']
LOG.info('Fetched mapping for %s institutions.', len(inst_map))
return inst_map
from typing import Dict, List, Set, Tuple
import logging
import os
import sys
import ibisclient
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
# Convenient type definition for group changes dict
GroupChanges = Dict[str, Dict[str, Set[str]]]
def create_lookup_connection(opts: dict) -> ibisclient.IbisClientConnection:
"""
Create an IbisClientConnection to either production or test Lookup API service, authenticated
with username and password from commandline or file.
"""
username = opts['--lookup-username']
password = opts.get('--lookup-password')
password_from = opts.get('--lookup-password-from')
if password_from is not None:
with open(password_from) as fobj:
password = fobj.read().strip()
if opts['--lookup-test']:
LOG.info('Using test instance of Lookup')
ibis_conn = ibisclient.createTestConnection()
else:
ibis_conn = ibisclient.createConnection()
ibis_conn.set_username(username)
ibis_conn.set_password(password)
return ibis_conn
def group_name(instid: str, career: str):
"""
Create Lookup group name from institution id and academic career mapping
>>> group_name('foo', 'ug')
'foo-sis-ug'
>>> group_name('BAR', 'pg')
'bar-sis-pg'
"""
return f'{instid.lower()}-sis-{career}'
def compare_with_lookup_groups(
ibis_group_methods: ibisclient.GroupMethods,
students_by_group: Dict[str, Set[str]]) -> Tuple[Set[str], GroupChanges]:
"""
Check each lookup group exists and compare its membership to supplied set.
Provide set of missing Lookup groups, and necessary changes as a dict mapping
group name to dict of sets of USNs to 'add' and 'remove'.
"""
# calculate changes
missing_groups = set()
group_changes = dict()
for group, students in sorted(students_by_group.items()):
# Check that group exists
if ibis_group_methods.getGroup(group) is None:
missing_groups.add(group)
continue
LOG.info('Group "%s" should have %s student(s):', group, len(students))
members: List[ibisclient.IbisPerson] = ibis_group_methods.getDirectMembers(
group, 'all_identifiers')
LOG.info('- Lookup has %s member(s)', len(members))
group_usns = {
id.value
for person in members if person.identifiers is not None
for id in person.identifiers if id.scheme == 'usn'
}
LOG.info(
'- with %s USNs%s', len(group_usns),
' - mismatch with membership' if len(group_usns) != len(members) else ''
)
to_add = students - group_usns
LOG.info('- %s need adding', len(to_add))
to_remove = group_usns - students
LOG.info('- %s need removing', len(to_remove))
if to_add | to_remove:
group_changes[group] = {'add': to_add, 'remove': to_remove}
LOG.info('%s group(s) need changes', len(group_changes))
return (missing_groups, group_changes)
def update_lookup_groups(
ibis_group_methods: ibisclient.GroupMethods,
group_changes: GroupChanges, dry_run: bool = True):
"""
Log and update (if not a dry-run) group memberships
"""
for group, changes in group_changes.items():
LOG.info('Updating %s:', group)
for usn in changes['add']:
LOG.info('- adding usn/%s', usn)
for usn in changes['remove']:
LOG.info('- removing usn/%s', usn)
if dry_run:
LOG.info('- skipping update in dry-run mode')
else:
ibis_group_methods.updateDirectMembers(
group,
[f'usn/{usn}' for usn in changes['add']],
[f'usn/{usn}' for usn in changes['remove']],
'SIS Synchronisation',
)
from typing import Dict, Generator, List, Optional, Set
import logging
import os
import sys
import datetime
import pydantic
import urllib.parse
import requests_oauthlib
from identitylib.identifiers import IdentifierSchemes
from .lookup import group_name
STUDENT_API_ROOT = 'https://api.apps.cam.ac.uk/university-student/v1alpha2/'
ACADEMIC_CAREER_MAPPING = {
'UGRD': 'ug',
'PGRD': 'pg',
}
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
def get_students_by_group(
session: requests_oauthlib.OAuth2Session, inst_map: Dict[str, str]) -> Dict[str, Set[str]]:
"""
Create a map from Lookup group name to sets of students within that institution with status
matching career. Group names are formed from the Lookup instids and student identifiers
are USNs
Note that since students can be members of more than one institution, the sum of the lengths
of each sets may not equal the length of the union of all of the sets.
"""
students_by_group = {}
# Capture ignored affiliations and careers
ignored_affiliations = set()
ignored_careers = set()
# Fetch all students parsed as instances of Student.
today = datetime.date.today()
for s in fetch_all_students(session):
# Add student to any institutions they are affiliated with.
for a in s.affiliations:
# Validate and normalise scheme
try:
aff_scheme = IdentifierSchemes.from_string(a.scheme, find_by_alias=True)
except ValueError as e:
LOG.warning(e)
continue
# Ignore non college/departmental affiliations.
if aff_scheme != IdentifierSchemes.STUDENT_INSTITUTION:
continue
# Ignore expired or yet to be affiliations.
if a.end is not None and a.end < today:
continue
if a.start is not None and a.start >= today:
continue
# Find the instid for this institution. Continue if there is no known mapping.
instid = inst_map.get(a.value)
if instid is None:
ignored_affiliations.add(a.value)
continue
# Only those with appropriate Academic Career (status) values
career = ACADEMIC_CAREER_MAPPING.get(a.status)
if career is None:
ignored_careers.add(a.status)
continue
# Ensure there is a set of student ids for this institution career group.
student_ids = students_by_group.setdefault(group_name(instid, career), set())
for i in s.identifiers:
# Validate and normalise scheme
try:
id_scheme = IdentifierSchemes.from_string(i.scheme, find_by_alias=True)
except ValueError as e:
LOG.warning(f'Identifier: {i.value} ({a.value}) - "{e}"')
continue
# Only identifiers from Student Records Person scheme (USNs)
if id_scheme != IdentifierSchemes.USN:
continue
# Add USN to group
student_ids.add(i.value)
# Report ignored values (possibly missing from inst mapping or career mapping above)
if ignored_affiliations:
LOG.info('Ignored Affiliations:')
for a in sorted(ignored_affiliations):
LOG.info(f'- {a}')
if ignored_careers:
LOG.info('Ignored Academic Careers:')
for a in sorted(ignored_careers):
LOG.info(f'- {a}')
return students_by_group
# Student API schema
class StudentIdentifier(pydantic.BaseModel):
"""
Identifier resource from Student API.
"""
scheme: str
value: str
class StudentAffiliation(pydantic.BaseModel):
"""
Affiliation resource from Student API.
"""
end: Optional[datetime.date]
scheme: str
start: Optional[datetime.date]
status: str
value: str
class Student(pydantic.BaseModel):
"""
Student resource from Student API.
"""