Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: posible fix for html encoded strings #21

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 39 additions & 46 deletions src/Formatter.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ public static function nameCase(?string $name = '', ?array $options = []): strin
$original = $name;

// Capitalize
$name = self::capitalize($name);
self::capitalize($name);

foreach (self::getReplacements() as $pattern => $replacement) {
$name = mb_ereg_replace($pattern, $replacement, $name);

Expand All @@ -210,10 +211,14 @@ public static function nameCase(?string $name = '', ?array $options = []): strin
// @codeCoverageIgnoreEnd
}

$name = self::correctInitialNames($name);
$name = self::correctLowerCaseWords($name);
self::correctInitialNames($name);
self::correctLowerCaseWords($name);

self::processOptions($name);

self::adjustHTMLEntities($name);

return self::processOptions($name);
return $name;
}

/**
Expand Down Expand Up @@ -251,10 +256,8 @@ private static function skipMixed(string $name): bool
* Capitalize first letters.
*
* @param string $name
*
* @return string
*/
private static function capitalize(string $name): string
private static function capitalize(string &$name): void
{
$name = mb_strtolower($name);

Expand All @@ -267,38 +270,34 @@ private static function capitalize(string $name): string
return mb_strtolower($matches[0]);
}, $name);

return self::updateIrish($name);
self::updateIrish($name);
}

/**
* Update for Irish names.
*
* @param string $name
*
* @return string
*/
private static function updateIrish(string $name): string
private static function updateIrish(string &$name): void
{
if ( ! self::$options['irish']) return $name;
if ( ! self::$options['irish']) return;

if (
mb_ereg_match('.*?\bMac[A-Za-z]{2,}[^aciozj]\b', $name) ||
mb_ereg_match('.*?\bMc', $name)
) {
$name = self::updateMac($name);
self::updateMac($name);
}

return mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
$name = mb_ereg_replace('Macmurdo', 'MacMurdo', $name);
}

/**
* Updates irish Mac & Mc.
*
* @param string $name
*
* @return string
*/
private static function updateMac(string $name): string
private static function updateMac(string &$name): void
{
$name = mb_ereg_replace_callback('\b(Ma?c)([A-Za-z]+)', function ($matches) {
return $matches[1] . mb_strtoupper(mb_substr($matches[2], 0, 1)) . mb_substr($matches[2], 1);
Expand All @@ -308,8 +307,6 @@ private static function updateMac(string $name): string
foreach (self::EXCEPTIONS as $pattern => $replacement) {
$name = mb_ereg_replace($pattern, $replacement, $name);
}

return $name;
}

/**
Expand All @@ -336,12 +333,10 @@ private static function getReplacements(): array
* Correct capitalization of initial names like JJ and TJ.
*
* @param string $name
*
* @return string
*/
private static function correctInitialNames(string $name): string
private static function correctInitialNames(string &$name): void
{
return mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
$name = mb_ereg_replace_callback(self::INITIAL_NAME_REGEX, function ($matches) {
$match = $matches[0];

if (in_array($matches[1], self::INITIAL_NAME_EXCEPTIONS)) {
Expand All @@ -356,51 +351,42 @@ private static function correctInitialNames(string $name): string
* Correct lower-case words of titles.
*
* @param string $name
*
* @return string
*/
private static function correctLowerCaseWords(string $name): string
private static function correctLowerCaseWords(string &$name): void
{
foreach (self::LOWER_CASE_WORDS as $lowercase) {
$name = mb_ereg_replace('\b' . $lowercase . '\b', mb_strtolower($lowercase), $name);
}
return $name;
}

/**
* Process options with given name
*
* @param string $name
*
* @return string
*/
private static function processOptions(string $name): string
private static function processOptions(string &$name): void
{
if (self::$options['roman']) {
$name = self::updateRoman($name);
self::updateRoman($name);
}

if (self::$options['spanish']) {
$name = self::fixConjunction($name);
self::fixConjunction($name);
}

if (self::$options['postnominal']) {
$name = self::fixPostNominal($name);
self::fixPostNominal($name);
}

return $name;
}

/**
* Fix roman numeral names.
*
* @param string $name
*
* @return string
*/
private static function updateRoman(string $name): string
private static function updateRoman(string &$name): void
{
return mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
$name = mb_ereg_replace_callback(self::ROMAN_REGEX, function ($matches) {
return mb_strtoupper($matches[0]);
}, $name);
}
Expand All @@ -409,29 +395,36 @@ private static function updateRoman(string $name): string
* Fix Spanish conjunctions.
*
* @param string $name
*
* @return string
*/
private static function fixConjunction(string $name): string
private static function fixConjunction(string &$name): void
{
foreach (self::CONJUNCTIONS as $conjunction) {
$name = mb_ereg_replace('\b' . $conjunction . '\b', mb_strtolower($conjunction), $name);
}
return $name;
}

/**
* Fix post-nominal letter cases.
*
* @param string $name
* @return string
*/
private static function fixPostNominal(string $name): string
private static function fixPostNominal(string &$name): void
{
$postNominals = array_diff(self::POST_NOMINALS, self::$postNominalsExcluded);
foreach ($postNominals as $postNominal) {
$name = mb_ereg_replace('\b' . $postNominal . '\b', $postNominal, $name, 'ix');
}
return $name;
}

/**
* Decode HTML entities.
*
* @param string $name
*/
private static function adjustHTMLEntities(string &$name): void
{
$name = mb_ereg_replace_callback('&[a-zA-Z0-9#]+;', function ($matches) {
return mb_strtolower($matches[0]);
}, $name);
}
}
42 changes: 42 additions & 0 deletions tests/HtmlEncodingTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?php namespace Tamtamchik\NameCase\Test;

use PHPUnit\Framework\TestCase;
use function Tamtamchik\NameCase\str_name_case;

class HtmlEncodingTest extends TestCase
{
// Currently if I pass & through, namecase changes it to &Amp; which breaks decoding. Please add an ignore for html encoded entities.
//&lt;, &gt;, &amp, &#39; and &quot;

// Here is what I'm having to do currently to get around the problem:
//
// $adjusted_name = str_name_case($input);
//
// // Use preg_replace_callback to match HTML entities and convert them to lowercase.
// $adjusted_name = preg_replace_callback('/&[a-zA-Z0-9#]+;/', function($matches) {
// return strtolower($matches[0]);
// }, $adjusted_name);
//
// return $adjusted_name;

private $names = [
"Keith & Leo da Vinci",
"Keith &amp; Yusof bin Ishak",
"Keith &amp; Leo &amp; Ben",
"Keith &amp; Leo & ben Gurion",
"Keith &amp; Leo &amp; MacMurdo & Paul &quot;Ringo&quote;",
"Keith &amp; Leo &amp; John & Paul \"Ringo\"",
"&lt;Keith&gt; &amp; Leo",
"<Keith> & Leonard",
"&#39;Keith&#39; & Leo",
"'Keith' &amp; Charles II",
];

/** Test function call. */
public function testCallWorks(): void
{
foreach ($this->names as $name) {
$this->assertEquals($name, str_name_case(mb_strtolower($name)));
}
}
}