From e283a0aacb366512d5cd54255c2cd4c572c7cb4e Mon Sep 17 00:00:00 2001 From: Nick Logan Date: Thu, 11 Apr 2024 18:28:56 +0000 Subject: [PATCH 1/3] Add intl extension to suggested packages --- composer.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/composer.json b/composer.json index 111768a..1e22117 100644 --- a/composer.json +++ b/composer.json @@ -29,6 +29,9 @@ "squizlabs/php_codesniffer": "*", "phpstan/phpstan": "*" }, + "suggest": { + "ext-intl": "Enables improved email address IDN normalization" + }, "autoload": { "psr-4": { "MaxMind\\": "src" From a34eee93e32e53122ad33c07afe474968fea8cad Mon Sep 17 00:00:00 2001 From: Nick Logan Date: Thu, 11 Apr 2024 18:33:42 +0000 Subject: [PATCH 2/3] Test email domain NFC normalization --- tests/MaxMind/Test/MinFraud/UtilTest.php | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/MaxMind/Test/MinFraud/UtilTest.php b/tests/MaxMind/Test/MinFraud/UtilTest.php index b547463..44096cf 100644 --- a/tests/MaxMind/Test/MinFraud/UtilTest.php +++ b/tests/MaxMind/Test/MinFraud/UtilTest.php @@ -308,6 +308,26 @@ public function testMaybeHashEmail(): void ], ], ], + [ + 'name' => 'email domain nfc normalization form 1', + 'input' => ['email' => ['address' => "example@bu\u{0308}cher.com"]], + 'expected' => [ + 'email' => [ + 'address' => '2b21bc76dab3c8b1622837c1d698936c', + 'domain' => 'xn--bcher-kva.com', + ], + ], + ], + [ + 'name' => 'email domain nfc normalization form 2', + 'input' => ['email' => ['address' => "example@b\u{00FC}cher.com"]], + 'expected' => [ + 'email' => [ + 'address' => '2b21bc76dab3c8b1622837c1d698936c', + 'domain' => 'xn--bcher-kva.com', + ], + ], + ], ); } From 7953ea17c5a621babd2bfd2b97f1eca7da78c4ee Mon Sep 17 00:00:00 2001 From: Nick Logan Date: Thu, 11 Apr 2024 18:56:06 +0000 Subject: [PATCH 3/3] Normalize email local part to NFC --- CHANGELOG.md | 2 ++ src/MinFraud/Util.php | 4 ++++ tests/MaxMind/Test/MinFraud/UtilTest.php | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bfa31f9..35555b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ CHANGELOG become `gmail.com`. * Additional `gmail.com` typos are now normalized when `hashEmail` is used. For example, `gmali.com` will become `gmail.com`. +* When `hashEmail` is used, the local part of an email address is now + normalized to NFC. 2.0.0 (2023-12-04) ------------------ diff --git a/src/MinFraud/Util.php b/src/MinFraud/Util.php index e95c796..ce0ff5d 100644 --- a/src/MinFraud/Util.php +++ b/src/MinFraud/Util.php @@ -297,6 +297,10 @@ public static function maybeHashEmail(array $values): array $domain = self::cleanDomain(substr($address, $atIdx + 1)); $localPart = substr($address, 0, $atIdx); + if (class_exists('Normalizer')) { + $localPart = \Normalizer::normalize($localPart, \Normalizer::FORM_C); + } + if ($domain !== '' && !isset($values['email']['domain'])) { $values['email']['domain'] = $domain; } diff --git a/tests/MaxMind/Test/MinFraud/UtilTest.php b/tests/MaxMind/Test/MinFraud/UtilTest.php index 44096cf..56e8d6b 100644 --- a/tests/MaxMind/Test/MinFraud/UtilTest.php +++ b/tests/MaxMind/Test/MinFraud/UtilTest.php @@ -328,6 +328,26 @@ public function testMaybeHashEmail(): void ], ], ], + [ + 'name' => 'email local part nfc normalization form 1', + 'input' => ['email' => ['address' => "bu\u{0308}cher@example.com"]], + 'expected' => [ + 'email' => [ + 'address' => '53550c712b146287a2d0dd30e5ed6f4b', + 'domain' => 'example.com', + ], + ], + ], + [ + 'name' => 'email local part nfc normalization form 2', + 'input' => ['email' => ['address' => "b\u{00FC}cher@example.com"]], + 'expected' => [ + 'email' => [ + 'address' => '53550c712b146287a2d0dd30e5ed6f4b', + 'domain' => 'example.com', + ], + ], + ], ); }