From 67f5fd8db116f08e44b4937f7c4aebd6138c5341 Mon Sep 17 00:00:00 2001 From: Robin Goodall <rjg21@cam.ac.uk> Date: Thu, 6 Feb 2025 19:34:27 +0000 Subject: [PATCH] feat: remove db case-insensitivity on last name and uppercase in normalisation --- activate_account/apps.py | 2 + activate_account/factories.py | 14 +++--- .../management/commands/init_mock_data.py | 30 +++++++++--- ...lter_accountidentifier_id_and_last_name.py | 36 +++++++++++++++ activate_account/models.py | 3 +- activate_account/tests/test_account.py | 20 +------- .../tests/test_account_identifier.py | 12 ----- api/v1alpha/tests/test_reset_token.py | 14 ++++++ authentication/normalisation.py | 2 +- authentication/tests/test_login_view.py | 46 +++++++++++++++++-- authentication/tests/test_normalisation.py | 28 +++++++++++ .../tests/test_token_request_serializer.py | 17 +++++++ data_manager_api/tests/test_serializers.py | 19 ++++---- data_manager_api/tests/test_views.py | 4 +- 14 files changed, 186 insertions(+), 61 deletions(-) create mode 100644 activate_account/migrations/0008_alter_accountidentifier_id_and_last_name.py create mode 100644 authentication/tests/test_normalisation.py diff --git a/activate_account/apps.py b/activate_account/apps.py index afbf801..42e6f03 100644 --- a/activate_account/apps.py +++ b/activate_account/apps.py @@ -14,6 +14,8 @@ class Config(AppConfig): #: The human-readable verbose name for this application. verbose_name = "Activate Account" + default_auto_field = "django.db.models.BigAutoField" + def ready(self): """ Perform application initialisation once the Django platform has been initialised. diff --git a/activate_account/factories.py b/activate_account/factories.py index 374e0b9..56c0e7f 100644 --- a/activate_account/factories.py +++ b/activate_account/factories.py @@ -48,18 +48,20 @@ class AccountDetailsFactory(factory.django.DjangoModelFactory): @factory.lazy_attribute def name(self): - # Generate first name + # Generate full name faker = factory.Faker._get_faker() first_letter = self.account.crsid[0].upper() - first_name = faker.first_name() + second_letter = self.account.crsid[1].upper() + last_name = faker.last_name() attempts = 0 max_attempts = 20 - while not first_name.startswith(first_letter) and attempts < max_attempts: - first_name = faker.first_name() + while not last_name.startswith(second_letter) and attempts < max_attempts: + last_name = faker.last_name() attempts += 1 + title = faker.prefix().replace(".", " ") if faker.boolean() else "" - return f"{first_name}" + return f"{last_name} {title} {first_letter}." class AccountIdentifierFactory(factory.django.DjangoModelFactory): @@ -67,7 +69,7 @@ class AccountIdentifierFactory(factory.django.DjangoModelFactory): model = AccountIdentifier account_id = factory.SubFactory(AccountFactory) - last_name = factory.Faker("last_name") + last_name = factory.LazyAttribute(lambda n: factory.Faker._get_faker().last_name().upper()) date_of_birth = factory.Faker("date_of_birth") @factory.lazy_attribute diff --git a/activate_account/management/commands/init_mock_data.py b/activate_account/management/commands/init_mock_data.py index 2d76844..4320748 100644 --- a/activate_account/management/commands/init_mock_data.py +++ b/activate_account/management/commands/init_mock_data.py @@ -1,9 +1,12 @@ +from datetime import date from pprint import pprint from django.core.management.base import BaseCommand -from activate_account.factories import AccountDetailsFactory -from api.v1alpha.serializers import AccountSerializer +from activate_account.factories import AccountFactory +from activate_account.models import Account +from data_manager_api.serializers import AccountDataSerializer +from data_manager_api.tests.utils import data_account_factory class Command(BaseCommand): @@ -13,13 +16,28 @@ class Command(BaseCommand): self.stdout.write(self.style.SUCCESS("Starting to initialize mock data...")) for _ in range(10): - details = AccountDetailsFactory() + account = AccountFactory.build() + data = data_account_factory(account) + valid_at = account.valid_at + + serializer = AccountDataSerializer(data=data, valid_at=valid_at) + serializer.is_valid(raise_exception=True) + + instance = serializer.save() + + account = Account.objects.get(crsid=instance.crsid) print("Created user:") pprint( { - **AccountSerializer(details.account).data, - "registration_code": details.account.code, - } + "crsid": account.crsid, + "name": account.account_details.name, + "last_name": account.account_identifier.first().last_name, + "date_of_birth": date.strftime( + account.account_identifier.first().date_of_birth, "%Y-%m-%d" + ), + "code": account.account_identifier.first().code, + }, + sort_dicts=False, ) self.stdout.write(self.style.SUCCESS("Successfully initialized mock data.")) diff --git a/activate_account/migrations/0008_alter_accountidentifier_id_and_last_name.py b/activate_account/migrations/0008_alter_accountidentifier_id_and_last_name.py new file mode 100644 index 0000000..acb0934 --- /dev/null +++ b/activate_account/migrations/0008_alter_accountidentifier_id_and_last_name.py @@ -0,0 +1,36 @@ +# Generated by Django 4.2.14 on 2025-02-06 19:17 + +from django.db import migrations, models + +from authentication.normalisation import normalise_name + + +def normalise_names(apps, schema_editor): + AccountIdentifier = apps.get_model("activate_account", "AccountIdentifier") + for account_identifier in AccountIdentifier.objects.all(): + account_identifier.last_name = normalise_name(account_identifier.last_name) + account_identifier.save() + + +class Migration(migrations.Migration): + dependencies = [ + ("activate_account", "0007_lockout_lockout_unique_lockout"), + ] + + operations = [ + migrations.AlterField( + model_name="accountidentifier", + name="last_name", + field=models.CharField(max_length=100), + ), + migrations.AlterField( + model_name="accountidentifier", + name="id", + field=models.BigAutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + # No way to reverse this operation without storing the original data, and doing so should + # not be necessary. + migrations.RunPython(normalise_names, reverse_code=migrations.RunPython.noop), + ] diff --git a/activate_account/models.py b/activate_account/models.py index 282f4a8..802c56b 100644 --- a/activate_account/models.py +++ b/activate_account/models.py @@ -53,7 +53,8 @@ class AccountIdentifier(models.Model): account_id = models.ForeignKey( Account, on_delete=models.CASCADE, related_name="account_identifier" ) - last_name = models.CharField(max_length=100, db_collation="case_insensitive") + # Last name cannot have a case-insensitive collation as this prevents matching with a "LIKE" + last_name = models.CharField(max_length=100) date_of_birth = models.DateField() code = models.CharField(max_length=100) diff --git a/activate_account/tests/test_account.py b/activate_account/tests/test_account.py index a21cd57..bc98700 100644 --- a/activate_account/tests/test_account.py +++ b/activate_account/tests/test_account.py @@ -3,30 +3,12 @@ import random import pytest from django.db import IntegrityError -from activate_account.factories import AccountFactory, AccountIdentifierFactory +from activate_account.factories import AccountFactory from activate_account.models import Account pytestmark = pytest.mark.django_db -def test_account_unique_together_case_insensitive(): - account = AccountFactory(account_identifier=True) - last_name = account.account_identifier.first().last_name - code = account.account_identifier.first().code - date_of_birth = account.account_identifier.first().date_of_birth - last_name_upper = last_name.upper() - - assert account.account_identifier.first().last_name != last_name_upper # Sanity check - - with pytest.raises(IntegrityError): - AccountIdentifierFactory( - last_name=last_name_upper, - code=code, - date_of_birth=date_of_birth, - account_id=account, - ) - - def test_account_separate_uniqueness(): n_accounts = random.randrange(10, 21) diff --git a/activate_account/tests/test_account_identifier.py b/activate_account/tests/test_account_identifier.py index 002c607..fc53372 100644 --- a/activate_account/tests/test_account_identifier.py +++ b/activate_account/tests/test_account_identifier.py @@ -18,18 +18,6 @@ def test_account_uniqueness_together(account_identifier): ) -def test_account_identifier_unique_together_case_insensitive(account_identifier): - last_name_upper = account_identifier.last_name.upper() - assert account_identifier.last_name != last_name_upper # Sanity check - - with pytest.raises(IntegrityError): - AccountIdentifierFactory( - last_name=last_name_upper, - date_of_birth=account_identifier.date_of_birth, - code=account_identifier.code, - ) - - def test_same_code_across_different_accounts(): identifier_code = "SHARED_CODE" diff --git a/api/v1alpha/tests/test_reset_token.py b/api/v1alpha/tests/test_reset_token.py index e19293a..480da7f 100644 --- a/api/v1alpha/tests/test_reset_token.py +++ b/api/v1alpha/tests/test_reset_token.py @@ -1,5 +1,8 @@ +import re + import pytest from django.conf import settings +from django.test import override_settings from django.urls import reverse from rest_framework import status @@ -60,3 +63,14 @@ def test_missing_user_reset_token( assert response.status_code == status.HTTP_400_BAD_REQUEST # Call once and not retried assert missing_user_reset_token_response.call_count == 1 + + +@override_settings(FAKE_RESET_TOKEN_IF_MISSING=True) +def test_missing_user_fake_reset_token( + authenticated_api_client, url, missing_user_reset_token_response +): + response = authenticated_api_client.get(url) + + assert response.status_code == status.HTTP_200_OK + # Looks like a reset token but with FAKE at the end + assert re.fullmatch(r"([0-9A-Z]{4}-){3}FAKE", response.data["token"]) diff --git a/authentication/normalisation.py b/authentication/normalisation.py index 3ff1ae2..01d1d53 100644 --- a/authentication/normalisation.py +++ b/authentication/normalisation.py @@ -37,4 +37,4 @@ def normalise_name(name): # i.e. "AA" with "A", "AE" with "A", "OE" with "O", "UE" with "U" # Remove leading and trailing whitespaces - return name.strip() + return name.strip().upper() diff --git a/authentication/tests/test_login_view.py b/authentication/tests/test_login_view.py index b1a6124..d5bc9d9 100644 --- a/authentication/tests/test_login_view.py +++ b/authentication/tests/test_login_view.py @@ -85,16 +85,52 @@ def test_valid_session_grant(client, valid_session_grant_body): assert_is_valid_login(post_login(client, valid_session_grant_body)) -@pytest.mark.parametrize("valid_session_grant_body", ["crsid", "last_name"], indirect=True) -def test_valid_case_insensitive(request, client, valid_session_grant_body): - """CRSId and Last namecomparison is case insensitive""" - field = request.node.callspec.params["valid_session_grant_body"] +@pytest.mark.parametrize("valid_session_grant_body", ["crsid"], indirect=True) +def test_valid_crsid_case_insensitive(request, client, valid_session_grant_body): + """CRSId comparison is case insensitive""" + field = "crsid" + assert_is_valid_login( + post_login( + client, + { + **valid_session_grant_body, + field: valid_session_grant_body[field].upper(), # Upper case as fixture is lower + }, + ) + ) + + +@pytest.mark.parametrize("valid_session_grant_body", ["last_name"], indirect=True) +def test_valid_last_name_case_insensitive(request, client, valid_session_grant_body): + """Last name comparison is case insensitive (due to normalisation)""" + field = "last_name" + assert_is_valid_login( + post_login( + client, + { + **valid_session_grant_body, + field: valid_session_grant_body[field].lower(), # Lower case as fixture is upper + }, + ) + ) + + +@pytest.mark.parametrize("valid_session_grant_body", ["last_name"], indirect=True) +def test_valid_last_name_partial(request, client, valid_session_grant_body, faker): + """Last name comparison also matches partial (starting followed by space)""" + field = "last_name" + # Just the first part with random case + just_first_part = valid_session_grant_body[field].split(" ")[0].lower() + just_first_part = "".join( + just_first_part[i].upper() if faker.boolean() else just_first_part[i] + for i in range(len(just_first_part)) + ) assert_is_valid_login( post_login( client, { **valid_session_grant_body, - field: valid_session_grant_body[field].upper(), + field: just_first_part, }, ) ) diff --git a/authentication/tests/test_normalisation.py b/authentication/tests/test_normalisation.py new file mode 100644 index 0000000..de11faf --- /dev/null +++ b/authentication/tests/test_normalisation.py @@ -0,0 +1,28 @@ +import pytest + +from authentication.normalisation import normalise_name + +EXPECTED_NORMALISATION = [ + # Fairly common "anglicized" names + ("Morgan A.", "MORGAN A"), + ("O'Leary P.B.", "O'LEARY P B"), + ("von Neumann J.", "VON NEUMANN J"), + # Diacritics and hyphens + ("Müller-Platz H.", "MULLER PLATZ H"), + ("Ångström A.", "ANGSTROM A"), + ("König M.A.", "KONIG M A"), + ("Hernández H.P.", "HERNANDEZ H P"), + ("Crête-Lafrenière P.", "CRETE LAFRENIERE P"), + ("Saint\u2013Martin Dr J.", "SAINT MARTIN DR J"), + # Some characters are just lost (not good) + ("Hermeß M.", "HERME M"), + ("Æsir B.", "SIR B"), + ("Bjørnson Prof. G.", "BJRNSON PROF G"), + # Nonsense + ("Bad?Data****Entered_%Here", "BAD DATA ENTERED HERE"), +] + + +@pytest.mark.parametrize("unnormalised,normalised", EXPECTED_NORMALISATION) +def test_normalise_name(unnormalised, normalised): + assert normalise_name(unnormalised) == normalised diff --git a/authentication/tests/test_token_request_serializer.py b/authentication/tests/test_token_request_serializer.py index ebe8d3f..e9144d2 100644 --- a/authentication/tests/test_token_request_serializer.py +++ b/authentication/tests/test_token_request_serializer.py @@ -1,4 +1,7 @@ +import pytest + from authentication.constants import SESSION_GRANT_TYPE +from authentication.errors import InvalidGrantError from authentication.serializers import TokenRequestSerializer @@ -28,3 +31,17 @@ def test_missing_crsid_and_last_name(): assert serializer.errors == { "non_field_errors": ["At least one of crsid and last name must be provided"] } + + +def test_empty_post_normalise_last_name(): + data = { + "grant_type": SESSION_GRANT_TYPE, + "date_of_birth": "1980-03-25", + "last_name": "-?_", + "code": "ABCDEF123456", + } + serializer = TokenRequestSerializer(data=data) + # Fails with "no matching account" before checking the database + with pytest.raises(InvalidGrantError) as excinfo: + serializer.is_valid() + assert str(excinfo.value) == "No matching account" diff --git a/data_manager_api/tests/test_serializers.py b/data_manager_api/tests/test_serializers.py index 59c7a06..802035a 100644 --- a/data_manager_api/tests/test_serializers.py +++ b/data_manager_api/tests/test_serializers.py @@ -3,6 +3,7 @@ from rest_framework.exceptions import ValidationError from activate_account.factories import AccountFactory from activate_account.models import Account, AccountIdentifier +from authentication.normalisation import normalise_name from data_manager_api.serializers import AccountDataSerializer from data_manager_api.tests.utils import data_account_factory @@ -17,6 +18,7 @@ def test_create(): name = data["name"] college = data["college"] affiliation = data["affiliation"] + dob = data["date_of_birth"] valid_at = account.valid_at assert not Account.objects.filter(crsid=data["crsid"]).exists() # Sanity check @@ -30,17 +32,15 @@ def test_create(): assert instance.crsid == account.crsid account_identifiers = instance.account_identifier.all() - assert account_identifiers.first().code == codes[0] + first_account_identifier = account_identifiers.first() + assert first_account_identifier.code == codes[0] + assert first_account_identifier.date_of_birth == dob + assert first_account_identifier.last_name == normalise_name(name) assert instance.account_details.name == name assert instance.account_details.college == college assert instance.account_details.affiliation == affiliation - for attribute in ["date_of_birth", "last_name"]: - assert getattr(instance.account_identifier.first(), attribute) == getattr( - account_identifiers.first(), attribute - ) - @pytest.mark.parametrize("partial", [True, False]) def test_update(partial): @@ -49,7 +49,7 @@ def test_update(partial): initial_data = data_account_factory(account) new_data = { "name": "Lee Sang-hyeok", - "last_name": "Lee", + "last_name": "Ignored", "date_of_birth": "1996-05-07", "codes": ["testcode123"], } @@ -68,6 +68,7 @@ def test_update(partial): instance = serializer.save() assert instance.account_details.name == new_data["name"] + assert instance.account_identifier.first().last_name == normalise_name(new_data["name"]) def test_override_nullable_fields(): @@ -102,10 +103,10 @@ def test_update_removes_old_codes(): def test_codes_empty_validation(): data = { "crsid": "ab1234", - "last_name": "Doe", + "last_name": "Ignored", "date_of_birth": "2000-01-01", "codes": [], - "name": "John", + "name": "Doe J.", "affiliation": "Student", "college": "College", } diff --git a/data_manager_api/tests/test_views.py b/data_manager_api/tests/test_views.py index b0e8df6..0c851a6 100644 --- a/data_manager_api/tests/test_views.py +++ b/data_manager_api/tests/test_views.py @@ -137,7 +137,7 @@ def test_account_update_valid_at_earlier(api_client): new_data = { "name": "Lee Min-hyung", "codes": ["new_code"], - "last_name": "Lee", + "last_name": "Ignored", "date_of_birth": "1996-05-07", } @@ -236,7 +236,7 @@ def test_read_only_mode_update(api_client, settings): new_data = { "name": "Ryu Min-seok", "codes": ["new_code"], - "last_name": "Ryu", + "last_name": "Ignored", "date_of_birth": "1996-05-07", } -- GitLab