FAQ | This is a LIVE service | Changelog

Commit 89502635 authored by Robin Goodall's avatar Robin Goodall 💬
Browse files

Use identity lib instead of hardcoded scheme string

parent 4925203a
......@@ -47,10 +47,17 @@ import oauthlib.oauth2
import pydantic
import requests_oauthlib
from identitylib.identifiers import Identifier, IdentifierSchemes
API_GATEWAY_TOKEN_URL = 'https://api.apps.cam.ac.uk/oauth2/v1/token'
STUDENT_API_ROOT = 'https://api.apps.cam.ac.uk/university-student/v1alpha2/'
INST_MAPPING_API_ROOT = 'https://api.apps.cam.ac.uk/institutions/mapping/v1/'
ACADEMIC_CAREER_MAPPING = {
'UGRD': 'ug',
'PGRD': 'pg',
}
LOG = logging.getLogger(os.path.basename(sys.argv[0]))
......@@ -89,78 +96,128 @@ def _student_inst_members(opts: dict, dry_run: bool):
session = _create_api_gateway_session(opts)
# Fetch institutional mapping and pre-compute a dict mapping
# institution.v1.student-records.university.identifiers.cam.ac.uk ids to lookup instids.
# Fetch institutional mapping and pre-compute a dict mapping Student Records Institution ids
# to Lookup instids.
r = session.get(INST_MAPPING_API_ROOT)
r.raise_for_status()
inst_map = {}
for datum in r.json().get('institutions', []):
for i in datum.get('identifiers', []):
try:
value, scheme = i.split('@')
except ValueError:
LOG.warn('Ignoring identifier with non-email formatting: %s', i)
id = Identifier.from_string(i, find_by_alias=True)
except ValueError as e:
LOG.warn(e)
continue
# TODO: looks like the scheme returned by the institutional map API doesn't match the
# scheme returned by the student API?
if scheme == 'institution.v1.student.university.identifiers.cam.ac.uk':
inst_map[value] = datum['instid']
# Only interested in Student Records Institution ids
if id.scheme == IdentifierSchemes.STUDENT_INSTITUTION:
inst_map[id.value] = datum['instid']
LOG.info('Fetched mapping for %s institutions.', len(inst_map))
# A map from institutions to sets of students within that institution. Institution identifiers
# are Lookup instids and student identifiers are those with scheme
# person.v1.student-records.university.identifiers.cam.ac.uk.
# Sanity check
if len(inst_map) == 0:
raise RuntimeError('Failed to fetch any institutional mappings')
# TODO: map from person.v1.student-records.university.identifiers.cam.ac.uk to CRSid via USN
# identifier scheme in Lookup.
crsids_by_usn = {}
# A map from (institution, career) tuples to sets of students within that institution with
# status matching career. Institution identifiers are Lookup instids and student identifiers
# are CRSids
#
# Note that since students can be members of more than one institution, the sum of the lengths
# of each sets may not equal the length of the union of all of the sets.
students_by_inst = {}
# TODO: map from person.v1.student-records.university.identifiers.cam.ac.uk to CRSid via USN
# identifier scheme in Lookup.
# Capture ignored affiliations and careers
ignored_affiliations = set()
ignored_careers = set()
# Fetch all students parsed as instances of Student.
today = datetime.date.today()
for s in _fetch_all_students(session):
# Add student to any institutions they are affiliated with.
for a in s.affiliations:
try:
# Validate and normalise scheme
a.scheme = IdentifierSchemes.from_string(a.scheme, find_by_alias=True)
except ValueError as e:
LOG.warn(e)
continue
# Ignore non college/departmental affiliations.
if a.scheme != 'institution.v1.student-records.university.identifiers.cam.ac.uk':
if a.scheme != IdentifierSchemes.STUDENT_INSTITUTION:
continue
# Ignore expired or yet to be affiliations.
if a.end is not None and a.end < today:
continue
if a.start is not None and a.start > today:
if a.start is not None and a.start >= today:
continue
# Find the instid for this institution. Continue if there is no known mapping.
instid = inst_map.get(a.value)
if instid is None:
ignored_affiliations.add(a.value)
continue
# Ensure there is a set of student ids for this institution.
if instid not in students_by_inst:
students_by_inst[instid] = set()
student_ids = students_by_inst[instid]
# Only those with appropriate Academic Career (status) values
career = ACADEMIC_CAREER_MAPPING.get(a.status)
if career is None:
ignored_careers.add(a.status)
continue
# Ensure there is a set of student ids for this institution career group.
student_ids = students_by_inst.setdefault((instid, career), set())
for i in s.identifiers:
# Only identifiers from a known scheme are used.
if i.scheme != 'person.v1.student-records.university.identifiers.cam.ac.uk':
try:
# Validate and normalise scheme
i.scheme = IdentifierSchemes.from_string(i.scheme, find_by_alias=True)
except ValueError as e:
LOG.warn(e)
continue
# Only identifiers from Student Records Person scheme (USNs)
if i.scheme != IdentifierSchemes.USN:
continue
# Map USN to CRSid
crsid = crsids_by_usn.get(i.value)
if crsid is None:
LOG.warn(f'No CRSid for USN: {i.value} ({s.surname})')
continue
# Add identifier to list.
student_ids.add(i.value)
student_ids.add(crsid)
# Report ignored values (possibly missing from inst mapping or career mapping above)
if ignored_affiliations:
LOG.info('Ignored Affiliations:')
for a in sorted(ignored_affiliations):
LOG.info(f'- {a}')
if ignored_careers:
LOG.info('Ignored Academic Careers:')
for a in sorted(ignored_careers):
LOG.info(f'- {a}')
# Sanity check
if len(students_by_inst) == 0:
raise RuntimeError('Failed to fetch any student records')
# Log some stats
LOG.info('Fetched record(s) for %s institutions.', len(students_by_inst))
for inst_id, students in students_by_inst.items():
LOG.info('Institution "%s" has %s student(s).', inst_id, len(students))
LOG.info('Fetched record(s) for %s (institution, career) groups.', len(students_by_inst))
for (inst_id, career), students in students_by_inst.items():
LOG.info('Institution "%s" (%s) has %s student(s).', inst_id, career, len(students))
LOG.info('Total affiliation count: %s', sum(len(s) for s in students_by_inst.values()))
all_students = set()
for s in students_by_inst.values():
all_students |= s
LOG.info('Total student count: %s', len(all_students))
# TODO: calculate changes
# TODO: limits / caps on changes
# TODO: actually perform synchronisation here.
return '{ "status": "ok" }', {'Content-Type': 'application/json'}
......
......@@ -2,3 +2,4 @@ docopt~=0.6
requests-oauthlib>=1.3.1,<2.0
requests>=2.21.1,<3.0
pydantic>=1.9.0,<2.0
ucam-identitylib~=1.0.4
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment