-
Dr Rich Wareham authored
Now we have on-boarded users, move away from being compatible with the authenticator to using displayName in preference to cn/sn. Closes #5
Dr Rich Wareham authoredNow we have on-boarded users, move away from being compatible with the authenticator to using displayName in preference to cn/sn. Closes #5
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
naming.py 6.96 KiB
"""
Utilities for constructing human-friendly names.
"""
import collections
# The human-friendly names constructed by get_names().
Names = collections.namedtuple('Names', 'given_name family_name')
def get_names(*, uid, display_name=None, cn=None, sn=None):
"""
If we only have a uid, this is used for both given name and family name.
>>> get_names(uid='spqr1')
Names(given_name='spqr1', family_name='spqr1')
>>> get_names(uid='spqr1', display_name='spqr1')
Names(given_name='spqr1', family_name='spqr1')
>>> get_names(uid='spqr1', display_name='spqr1', cn='spqr1', sn='spqr1')
Names(given_name='spqr1', family_name='spqr1')
>>> get_names(uid='spqr1', display_name='')
Names(given_name='spqr1', family_name='spqr1')
>>> get_names(uid='spqr1', sn='')
Names(given_name='spqr1', family_name='spqr1')
>>> get_names(uid='spqr1', cn='')
Names(given_name='spqr1', family_name='spqr1')
"Odd" ASCII characters unsupported by Google are stripped out of names.
>>> get_names(uid='spqr1', display_name='Stephen @**Quill-Roman**@')
Names(given_name='Stephen', family_name='Quill-Roman')
Long names are truncated.
>>> get_names(uid='spqr1', display_name='Stephen Quill-Roman' + 'X' * 200)
... #doctest: +NORMALIZE_WHITESPACE
Names(given_name='Stephen',
family_name='Quill-RomanXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
If we have display name and surname and the display name ends with the surname, this is used to
form the given names assuming there is some string left.
>>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Quill Roman')
Names(given_name='Stephen', family_name='Quill Roman')
If this didn't work but we have display name, split it at the final space.
>>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Stephen Quill Roman')
Names(given_name='Stephen Quill', family_name='Roman')
>>> get_names(uid='spqr1', display_name='Stephen Quill Roman', cn='Stephen')
Names(given_name='Stephen Quill', family_name='Roman')
>>> get_names(uid='spqr1', display_name='Stephen Quill Roman')
Names(given_name='Stephen Quill', family_name='Roman')
If we have common name and surname and the common name ends with the surname, this is used to
form the given names assuming there is some string left.
>>> get_names(uid='spqr1', sn='Quill Roman', cn='Prof. S.P. Quill Roman')
Names(given_name='Prof. S.P.', family_name='Quill Roman')
If we *only* have a cn, split it at the final space character.
>>> get_names(uid='spqr1', cn='Prof. S.P. Quill Roman')
Names(given_name='Prof. S.P. Quill', family_name='Roman')
>>> get_names(uid='spqr1', sn='spqr1', cn='Prof. S.P.Q. Roman')
Names(given_name='Prof. S.P.Q.', family_name='Roman')
Support Wookey.
>>> get_names(uid='spqr1', display_name='Wookey')
Names(given_name='Wookey', family_name='spqr1')
>>> get_names(uid='spqr1', sn='Wookey')
Names(given_name='spqr1', family_name='Wookey')
>>> get_names(uid='spqr1', cn='Wookey')
Names(given_name='Wookey', family_name='spqr1')
"""
# If any of display name, common name or surname is the same as the uid, proceed as if it were
# unset. Trim any leading/trailing whitespace at the same time.
cn = cn.strip() if cn is not None and cn != uid else None
sn = sn.strip() if sn is not None and sn != uid else None
display_name = (
display_name.strip()
if display_name is not None and display_name != uid else None
)
# If any of cn, sn or display_name are blank, proceed as it they're not set.
cn = cn if cn != '' else None
sn = sn if sn != '' else None
display_name = display_name if display_name != '' else None
# Function to construct return value from family name and given name. Google names can't be
# longer than 60 characters so truncate them after cleaning.
def _make_ret(*, family_name, given_name):
return Names(family_name=_clean(family_name)[:60], given_name=_clean(given_name)[:40])
# If we have a sn and display name and the display name ends with sn, split out the sn.
if display_name is not None and sn is not None and display_name.endswith(sn):
given_name = display_name[:-len(sn)].strip()
if given_name != '':
return _make_ret(family_name=sn, given_name=given_name)
# If we have the display name, split at space and see if we have two parts.
if display_name is not None:
components = display_name.split()
if len(components) > 0:
family_name = components[-1]
given_name = ' '.join(components[:-1])
if given_name != '' and family_name != '':
return _make_ret(family_name=family_name, given_name=given_name)
# If we have a sn and cn and the cn ends with sn, split out the sn.
if cn is not None and sn is not None and cn.endswith(sn):
given_name = cn[:-len(sn)].strip()
if given_name != '':
return _make_ret(family_name=sn, given_name=given_name)
# If we have cn, split at space and see if we have two parts.
if cn is not None:
components = cn.split()
if len(components) > 0:
family_name = components[-1]
given_name = ' '.join(components[:-1])
if given_name != '' and family_name != '':
return _make_ret(family_name=family_name, given_name=given_name)
# Support Wookey.
if display_name is not None and ' ' not in display_name:
return _make_ret(family_name=uid, given_name=display_name)
if sn is not None and ' ' not in sn:
return _make_ret(family_name=sn, given_name=uid)
if cn is not None and ' ' not in cn:
return _make_ret(family_name=uid, given_name=cn)
# Give up and return uid for both fields
return _make_ret(family_name=uid, given_name=uid)
def _clean(s):
"""
Clean any "bad characters" in names. This pattern is based on the one used by the
legacy Google authenticator which has this comment:
Google API doesn't like _some_ characters. The 'documentation'
(http://www.google.com/support/a/bin/answer.py?answer=33386) says "First and last names
support unicode/UTF-8 characters, and may contain spaces, letters (a-z), numbers (0-9),
dashes (-), forward slashes (/), and periods (.)", which makes no sence [sic].
Experimentation suggests it chokes on '<', '>', and '=', but doesn't mind, e.g. cyrilic
characters. Compromise by filtering out "!"#$%&'()*+,:;<=>?@[\\]^_`{|}~" - i.e. all the
'odd' ASCII characters other than the ones explicitly supported.
We change this to allow "'" since plenty of names have this character. (E.g. "O'Reilly",
"D'Angelo", etc.)
>>> _clean('ab@c')
'abc'
>>> _clean('a "b" c')
'a b c'
"""
return ''.join(c for c in s if c not in _CLEAN_BAD_CHARS)
# Characters stripped by _clean. Present as a constant to avoid re-creating it.
_CLEAN_BAD_CHARS = '!"#$%&()*+,:;<=>?@[\\]^_`{|}~'