From 31c32c882481a1da33d88b5b4556ece1aafed097 Mon Sep 17 00:00:00 2001 From: Yuri Tkachenko Date: Tue, 23 Jul 2024 16:46:23 +0200 Subject: [PATCH 1/3] feat: posible fix #20 --- src/Formatter.php | 43 +++++++++++++++++++++++++++++++++++++- tests/HtmlEncodingTest.php | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 tests/HtmlEncodingTest.php diff --git a/src/Formatter.php b/src/Formatter.php index 091f1ec..efef52b 100644 --- a/src/Formatter.php +++ b/src/Formatter.php @@ -190,6 +190,9 @@ public static function nameCase(?string $name = '', ?array $options = []): strin self::setOptions($options); + // Temporarily replace HTML encoded entities with placeholders + $placeholders = self::replaceHtmlEntitiesWithPlaceholders($name); + // Do not do anything if string is mixed and lazy option is true. if ( ! self::canBeProcessed($name)) { return $name; @@ -213,7 +216,12 @@ public static function nameCase(?string $name = '', ?array $options = []): strin $name = self::correctInitialNames($name); $name = self::correctLowerCaseWords($name); - return self::processOptions($name); + $name = self::processOptions($name); + + // After name casing operations, restore HTML encoded entities + self::restoreHtmlEntitiesFromPlaceholders($name, $placeholders); + + return $name; } /** @@ -434,4 +442,37 @@ private static function fixPostNominal(string $name): string } return $name; } + + /** + * Replace HTML entities with placeholders. + * + * @param string $name + * @return array + */ + private static function replaceHtmlEntitiesWithPlaceholders(string &$name): array { + $placeholders = []; + $counter = 0; + + $name = preg_replace_callback('/&[a-zA-Z0-9#]+;/i', function($matches) use (&$placeholders, &$counter) { + $placeholder = mb_strtolower('HTML_ENTITY_PLACEHOLDER_' . $counter++. ' '); // note space at the end, to avoid merging with the next word + $placeholders[$placeholder] = $matches[0]; + return $placeholder; + }, $name); + + return $placeholders; + } + + /** + * Restore HTML entities. + * + * @param string $name + * @param array $placeholders + * @return void + */ + private static function restoreHtmlEntitiesFromPlaceholders(string &$name, array $placeholders): void { + foreach ($placeholders as $placeholder => $entity) { + $name = preg_replace('/' . preg_quote($placeholder, '/') . '/i', $entity, $name); + } + } + } diff --git a/tests/HtmlEncodingTest.php b/tests/HtmlEncodingTest.php new file mode 100644 index 0000000..6d1ef1c --- /dev/null +++ b/tests/HtmlEncodingTest.php @@ -0,0 +1,42 @@ + & Leonard", + "'Keith' & Leo", + "'Keith' & Charles II", + ]; + + /** Test function call. */ + public function testCallWorks(): void + { + foreach ($this->names as $name) { + $this->assertEquals($name, str_name_case(mb_strtolower($name))); + } + } +} From 9c6ae8bc6157d080f13ceef2ebb6ecc304856553 Mon Sep 17 00:00:00 2001 From: Yuri Tkachenko Date: Tue, 23 Jul 2024 17:03:40 +0200 Subject: [PATCH 2/3] chore: small refactoring --- src/Formatter.php | 71 ++++++++++++++++------------------------------- 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/src/Formatter.php b/src/Formatter.php index efef52b..9eac9a0 100644 --- a/src/Formatter.php +++ b/src/Formatter.php @@ -201,7 +201,8 @@ public static function nameCase(?string $name = '', ?array $options = []): strin $original = $name; // Capitalize - $name = self::capitalize($name); + self::capitalize($name); + foreach (self::getReplacements() as $pattern => $replacement) { $name = mb_ereg_replace($pattern, $replacement, $name); @@ -213,10 +214,10 @@ public static function nameCase(?string $name = '', ?array $options = []): strin // @codeCoverageIgnoreEnd } - $name = self::correctInitialNames($name); - $name = self::correctLowerCaseWords($name); + self::correctInitialNames($name); + self::correctLowerCaseWords($name); - $name = self::processOptions($name); + self::processOptions($name); // After name casing operations, restore HTML encoded entities self::restoreHtmlEntitiesFromPlaceholders($name, $placeholders); @@ -259,10 +260,8 @@ private static function skipMixed(string $name): bool * Capitalize first letters. * * @param string $name - * - * @return string */ - private static function capitalize(string $name): string + private static function capitalize(string &$name): void { $name = mb_strtolower($name); @@ -275,38 +274,34 @@ private static function capitalize(string $name): string return mb_strtolower($matches[0]); }, $name); - return self::updateIrish($name); + self::updateIrish($name); } /** * Update for Irish names. * * @param string $name - * - * @return string */ - private static function updateIrish(string $name): string + private static function updateIrish(string &$name): void { - if ( ! self::$options['irish']) return $name; + if ( ! self::$options['irish']) return; if ( mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) || mb_ereg_match('.*?\bMc', $name) ) { - $name = self::updateMac($name); + self::updateMac($name); } - return mb_ereg_replace('Macmurdo', 'MacMurdo', $name); + $name = mb_ereg_replace('Macmurdo', 'MacMurdo', $name); } /** * Updates irish Mac & Mc. * * @param string $name - * - * @return string */ - private static function updateMac(string $name): string + private static function updateMac(string &$name): void { $name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) { return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1); @@ -316,8 +311,6 @@ private static function updateMac(string $name): string foreach (self::EXCEPTIONS as $pattern => $replacement) { $name = mb_ereg_replace($pattern, $replacement, $name); } - - return $name; } /** @@ -344,12 +337,10 @@ private static function getReplacements(): array * Correct capitalization of initial names like JJ and TJ. * * @param string $name - * - * @return string */ - private static function correctInitialNames(string $name): string + private static function correctInitialNames(string &$name): void { - return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) { + $name = mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) { $match = $matches[0]; if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) { @@ -364,51 +355,42 @@ private static function correctInitialNames(string $name): string * Correct lower-case words of titles. * * @param string $name - * - * @return string */ - private static function correctLowerCaseWords(string $name): string + private static function correctLowerCaseWords(string &$name): void { foreach (self::LOWER_CASE_WORDS as $lowercase) { $name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name); } - return $name; } /** * Process options with given name * * @param string $name - * - * @return string */ - private static function processOptions(string $name): string + private static function processOptions(string &$name): void { if (self::$options['roman']) { - $name = self::updateRoman($name); + self::updateRoman($name); } if (self::$options['spanish']) { - $name = self::fixConjunction($name); + self::fixConjunction($name); } if (self::$options['postnominal']) { - $name = self::fixPostNominal($name); + self::fixPostNominal($name); } - - return $name; } /** * Fix roman numeral names. * * @param string $name - * - * @return string */ - private static function updateRoman(string $name): string + private static function updateRoman(string &$name): void { - return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) { + $name = mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) { return mb_strtoupper($matches[0]); }, $name); } @@ -417,35 +399,30 @@ private static function updateRoman(string $name): string * Fix Spanish conjunctions. * * @param string $name - * - * @return string */ - private static function fixConjunction(string $name): string + private static function fixConjunction(string &$name): void { foreach (self::CONJUNCTIONS as $conjunction) { $name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name); } - return $name; } /** * Fix post-nominal letter cases. * * @param string $name - * @return string */ - private static function fixPostNominal(string $name): string + private static function fixPostNominal(string &$name): void { $postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded); foreach ($postNominals as $postNominal) { $name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix'); } - return $name; } /** * Replace HTML entities with placeholders. - * + * * @param string $name * @return array */ From 529edbd5f3c9eee0fea438239f124ec433715906 Mon Sep 17 00:00:00 2001 From: Yuri Tkachenko Date: Tue, 6 Aug 2024 16:20:28 +0100 Subject: [PATCH 3/3] fix: simplify the fix --- src/Formatter.php | 37 ++++++------------------------------- 1 file changed, 6 insertions(+), 31 deletions(-) diff --git a/src/Formatter.php b/src/Formatter.php index 9eac9a0..f384b75 100644 --- a/src/Formatter.php +++ b/src/Formatter.php @@ -190,9 +190,6 @@ public static function nameCase(?string $name = '', ?array $options = []): strin self::setOptions($options); - // Temporarily replace HTML encoded entities with placeholders - $placeholders = self::replaceHtmlEntitiesWithPlaceholders($name); - // Do not do anything if string is mixed and lazy option is true. if ( ! self::canBeProcessed($name)) { return $name; @@ -219,8 +216,7 @@ public static function nameCase(?string $name = '', ?array $options = []): strin self::processOptions($name); - // After name casing operations, restore HTML encoded entities - self::restoreHtmlEntitiesFromPlaceholders($name, $placeholders); + self::adjustHTMLEntities($name); return $name; } @@ -421,35 +417,14 @@ private static function fixPostNominal(string &$name): void } /** - * Replace HTML entities with placeholders. + * Decode HTML entities. * * @param string $name - * @return array */ - private static function replaceHtmlEntitiesWithPlaceholders(string &$name): array { - $placeholders = []; - $counter = 0; - - $name = preg_replace_callback('/&[a-zA-Z0-9#]+;/i', function($matches) use (&$placeholders, &$counter) { - $placeholder = mb_strtolower('HTML_ENTITY_PLACEHOLDER_' . $counter++. ' '); // note space at the end, to avoid merging with the next word - $placeholders[$placeholder] = $matches[0]; - return $placeholder; + private static function adjustHTMLEntities(string &$name): void + { + $name = mb_ereg_replace_callback('&[a-zA-Z0-9#]+;', function ($matches) { + return mb_strtolower($matches[0]); }, $name); - - return $placeholders; } - - /** - * Restore HTML entities. - * - * @param string $name - * @param array $placeholders - * @return void - */ - private static function restoreHtmlEntitiesFromPlaceholders(string &$name, array $placeholders): void { - foreach ($placeholders as $placeholder => $entity) { - $name = preg_replace('/' . preg_quote($placeholder, '/') . '/i', $entity, $name); - } - } - }