""" Utilities for constructing human-friendly names. """ import collections # The human-friendly names constructed by get_names(). Names = collections.namedtuple('Names', 'given_name family_name') def get_names(*, uid, display_name=None, cn=None, sn=None): """ If we only have a uid, this is used for both given name and family name. >>> get_names(uid='spqr1') Names(given_name='spqr1', family_name='spqr1') >>> get_names(uid='spqr1', display_name='spqr1') Names(given_name='spqr1', family_name='spqr1') >>> get_names(uid='spqr1', display_name='spqr1', cn='spqr1', sn='spqr1') Names(given_name='spqr1', family_name='spqr1') >>> get_names(uid='spqr1', display_name='') Names(given_name='spqr1', family_name='spqr1') >>> get_names(uid='spqr1', sn='') Names(given_name='spqr1', family_name='spqr1') >>> get_names(uid='spqr1', cn='') Names(given_name='spqr1', family_name='spqr1') "Odd" ASCII characters unsupported by Google are stripped out of names. >>> get_names(uid='spqr1', display_name='Stephen @**Quill-Roman**@') Names(given_name='Stephen', family_name='Quill-Roman') Long names are truncated. >>> get_names(uid='spqr1', display_name='Stephen Quill-Roman' + 'X' * 200) ... #doctest: +NORMALIZE_WHITESPACE Names(given_name='Stephen', family_name='Quill-RomanXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX') If we have display name and surname and the display name ends with the surname, this is used to form the given names assuming there is some string left. >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Quill Roman') Names(given_name='Stephen', family_name='Quill Roman') If this didn't work but we have display name, split it at the final space. >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Stephen Quill Roman') Names(given_name='Stephen Quill', family_name='Roman') >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', cn='Stephen') Names(given_name='Stephen Quill', family_name='Roman') >>> get_names(uid='spqr1', display_name='Stephen Quill Roman') Names(given_name='Stephen Quill', family_name='Roman') If we have common name and surname and the common name ends with the surname, this is used to form the given names assuming there is some string left. >>> get_names(uid='spqr1', sn='Quill Roman', cn='Prof. S.P. Quill Roman') Names(given_name='Prof. S.P.', family_name='Quill Roman') If we *only* have a cn, split it at the final space character. >>> get_names(uid='spqr1', cn='Prof. S.P. Quill Roman') Names(given_name='Prof. S.P. Quill', family_name='Roman') >>> get_names(uid='spqr1', sn='spqr1', cn='Prof. S.P.Q. Roman') Names(given_name='Prof. S.P.Q.', family_name='Roman') Support Wookey. >>> get_names(uid='spqr1', display_name='Wookey') Names(given_name='Wookey', family_name='spqr1') >>> get_names(uid='spqr1', sn='Wookey') Names(given_name='spqr1', family_name='Wookey') >>> get_names(uid='spqr1', cn='Wookey') Names(given_name='Wookey', family_name='spqr1') """ # If any of display name, common name or surname is the same as the uid, proceed as if it were # unset. Trim any leading/trailing whitespace at the same time. cn = cn.strip() if cn is not None and cn != uid else None sn = sn.strip() if sn is not None and sn != uid else None display_name = ( display_name.strip() if display_name is not None and display_name != uid else None ) # If any of cn, sn or display_name are blank, proceed as it they're not set. cn = cn if cn != '' else None sn = sn if sn != '' else None display_name = display_name if display_name != '' else None # Function to construct return value from family name and given name. Google names can't be # longer than 60 characters so truncate them after cleaning. def _make_ret(*, family_name, given_name): return Names(family_name=_clean(family_name)[:60], given_name=_clean(given_name)[:40]) # If we have a sn and display name and the display name ends with sn, split out the sn. if display_name is not None and sn is not None and display_name.endswith(sn): given_name = display_name[:-len(sn)].strip() if given_name != '': return _make_ret(family_name=sn, given_name=given_name) # If we have the display name, split at space and see if we have two parts. if display_name is not None: components = display_name.split() if len(components) > 0: family_name = components[-1] given_name = ' '.join(components[:-1]) if given_name != '' and family_name != '': return _make_ret(family_name=family_name, given_name=given_name) # If we have a sn and cn and the cn ends with sn, split out the sn. if cn is not None and sn is not None and cn.endswith(sn): given_name = cn[:-len(sn)].strip() if given_name != '': return _make_ret(family_name=sn, given_name=given_name) # If we have cn, split at space and see if we have two parts. if cn is not None: components = cn.split() if len(components) > 0: family_name = components[-1] given_name = ' '.join(components[:-1]) if given_name != '' and family_name != '': return _make_ret(family_name=family_name, given_name=given_name) # Support Wookey. if display_name is not None and ' ' not in display_name: return _make_ret(family_name=uid, given_name=display_name) if sn is not None and ' ' not in sn: return _make_ret(family_name=sn, given_name=uid) if cn is not None and ' ' not in cn: return _make_ret(family_name=uid, given_name=cn) # Give up and return uid for both fields return _make_ret(family_name=uid, given_name=uid) def _clean(s): """ Clean any "bad characters" in names. This pattern is based on the one used by the legacy Google authenticator which has this comment: Google API doesn't like _some_ characters. The 'documentation' (http://www.google.com/support/a/bin/answer.py?answer=33386) says "First and last names support unicode/UTF-8 characters, and may contain spaces, letters (a-z), numbers (0-9), dashes (-), forward slashes (/), and periods (.)", which makes no sence [sic]. Experimentation suggests it chokes on '<', '>', and '=', but doesn't mind, e.g. cyrilic characters. Compromise by filtering out "!"#$%&'()*+,:;<=>?@[\\]^_`{|}~" - i.e. all the 'odd' ASCII characters other than the ones explicitly supported. We change this to allow "'" since plenty of names have this character. (E.g. "O'Reilly", "D'Angelo", etc.) >>> _clean('ab@c') 'abc' >>> _clean('a "b" c') 'a b c' """ return ''.join(c for c in s if c not in _CLEAN_BAD_CHARS) # Characters stripped by _clean. Present as a constant to avoid re-creating it. _CLEAN_BAD_CHARS = '!"#$%&()*+,:;<=>?@[\\]^_`{|}~'