naming.py

"""
Utilities for constructing human-friendly names.

"""
import collections


# The human-friendly names constructed by get_names().
Names = collections.namedtuple('Names', 'given_name family_name')


def get_names(*, uid, display_name=None, cn=None, sn=None):
    """
    If we only have a uid, this is used for both given name and family name.

    >>> get_names(uid='spqr1')
    Names(given_name='spqr1', family_name='spqr1')
    >>> get_names(uid='spqr1', display_name='spqr1')
    Names(given_name='spqr1', family_name='spqr1')
    >>> get_names(uid='spqr1', display_name='spqr1', cn='spqr1', sn='spqr1')
    Names(given_name='spqr1', family_name='spqr1')
    >>> get_names(uid='spqr1', display_name='')
    Names(given_name='spqr1', family_name='spqr1')
    >>> get_names(uid='spqr1', sn='')
    Names(given_name='spqr1', family_name='spqr1')
    >>> get_names(uid='spqr1', cn='')
    Names(given_name='spqr1', family_name='spqr1')

    "Odd" ASCII characters unsupported by Google are stripped out of names.

    >>> get_names(uid='spqr1', display_name='Stephen @**Quill-Roman**@')
    Names(given_name='Stephen', family_name='Quill-Roman')

    Long names are truncated.

    >>> get_names(uid='spqr1', display_name='Stephen Quill-Roman' + 'X' * 200)
    ... #doctest: +NORMALIZE_WHITESPACE
    Names(given_name='Stephen',
          family_name='Quill-RomanXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')

    If we have display name and surname and the display name ends with the surname, this is used to
    form the given names assuming there is some string left.

    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Quill Roman')
    Names(given_name='Stephen', family_name='Quill Roman')

    If this didn't work but we have display name, split it at the final space.

    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', sn='Stephen Quill Roman')
    Names(given_name='Stephen Quill', family_name='Roman')
    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman', cn='Stephen')
    Names(given_name='Stephen Quill', family_name='Roman')
    >>> get_names(uid='spqr1', display_name='Stephen Quill Roman')
    Names(given_name='Stephen Quill', family_name='Roman')

    If we have common name and surname and the common name ends with the surname, this is used to
    form the given names assuming there is some string left.

    >>> get_names(uid='spqr1', sn='Quill Roman', cn='Prof. S.P. Quill Roman')
    Names(given_name='Prof. S.P.', family_name='Quill Roman')

    If we *only* have a cn, split it at the final space character.

    >>> get_names(uid='spqr1', cn='Prof. S.P. Quill Roman')
    Names(given_name='Prof. S.P. Quill', family_name='Roman')
    >>> get_names(uid='spqr1', sn='spqr1', cn='Prof. S.P.Q. Roman')
    Names(given_name='Prof. S.P.Q.', family_name='Roman')

    Support Wookey.

    >>> get_names(uid='spqr1', display_name='Wookey')
    Names(given_name='Wookey', family_name='spqr1')
    >>> get_names(uid='spqr1', sn='Wookey')
    Names(given_name='spqr1', family_name='Wookey')
    >>> get_names(uid='spqr1', cn='Wookey')
    Names(given_name='Wookey', family_name='spqr1')

    """
    # If any of display name, common name or surname is the same as the uid, proceed as if it were
    # unset. Trim any leading/trailing whitespace at the same time.
    cn = cn.strip() if cn is not None and cn != uid else None
    sn = sn.strip() if sn is not None and sn != uid else None
    display_name = (
        display_name.strip()
        if display_name is not None and display_name != uid else None
    )

    # If any of cn, sn or display_name are blank, proceed as it they're not set.
    cn = cn if cn != '' else None
    sn = sn if sn != '' else None
    display_name = display_name if display_name != '' else None

    # Function to construct return value from family name and given name. Google names can't be
    # longer than 60 characters so truncate them after cleaning.
    def _make_ret(*, family_name, given_name):
        return Names(family_name=_clean(family_name)[:60], given_name=_clean(given_name)[:40])

    # If we have a sn and display name and the display name ends with sn, split out the sn.
    if display_name is not None and sn is not None and display_name.endswith(sn):
        given_name = display_name[:-len(sn)].strip()
        if given_name != '':
            return _make_ret(family_name=sn, given_name=given_name)

    # If we have the display name, split at space and see if we have two parts.
    if display_name is not None:
        components = display_name.split()
        if len(components) > 0:
            family_name = components[-1]
            given_name = ' '.join(components[:-1])
            if given_name != '' and family_name != '':
                return _make_ret(family_name=family_name, given_name=given_name)

    # If we have a sn and cn and the cn ends with sn, split out the sn.
    if cn is not None and sn is not None and cn.endswith(sn):
        given_name = cn[:-len(sn)].strip()
        if given_name != '':
            return _make_ret(family_name=sn, given_name=given_name)

    # If we have cn, split at space and see if we have two parts.
    if cn is not None:
        components = cn.split()
        if len(components) > 0:
            family_name = components[-1]
            given_name = ' '.join(components[:-1])
            if given_name != '' and family_name != '':
                return _make_ret(family_name=family_name, given_name=given_name)

    # Support Wookey.
    if display_name is not None and ' ' not in display_name:
        return _make_ret(family_name=uid, given_name=display_name)
    if sn is not None and ' ' not in sn:
        return _make_ret(family_name=sn, given_name=uid)
    if cn is not None and ' ' not in cn:
        return _make_ret(family_name=uid, given_name=cn)

    # Give up and return uid for both fields
    return _make_ret(family_name=uid, given_name=uid)


def _clean(s):
    """
    Clean any "bad characters" in names. This pattern is based on the one used by the
    legacy Google authenticator which has this comment:

        Google API doesn't like _some_ characters. The 'documentation'
        (http://www.google.com/support/a/bin/answer.py?answer=33386) says "First and last names
        support unicode/UTF-8 characters, and may contain spaces, letters (a-z), numbers (0-9),
        dashes (-), forward slashes (/), and periods (.)", which makes no sence [sic].
        Experimentation suggests it chokes on '<', '>', and '=', but doesn't mind, e.g. cyrilic
        characters. Compromise by filtering out "!"#$%&'()*+,:;<=>?@[\\]^_`{|}~" - i.e. all the
        'odd' ASCII characters other than the ones explicitly supported.

    We change this to allow "'" since plenty of names have this character. (E.g. "O'Reilly",
    "D'Angelo", etc.)

    >>> _clean('ab@c')
    'abc'
    >>> _clean('a "b" c')
    'a b c'

    """
    return ''.join(c for c in s if c not in _CLEAN_BAD_CHARS)


# Characters stripped by _clean. Present as a constant to avoid re-creating it.
_CLEAN_BAD_CHARS = '!"#$%&()*+,:;<=>?@[\\]^_`{|}~'