Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TASK] Add Preg::matchAll to wrap preg_match_all with error handling #1328

Merged
merged 1 commit into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions src/Utilities/Preg.php
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,37 @@ public function match(string $pattern, string $subject, ?array &$matches = null)
return $result;
}

/**
* Wraps `preg_match_all`.
*
* If an error occurs, and exceptions are not being thrown, zero (`0`) is returned.
*
* In the error case, if the `$matches` parameter is provided, it is set to an array containing empty arrays for the
* full pattern match and any possible subpattern match that might be expected.
* The algorithm to determine the length of this array simply counts the number of opening parentheses in the
* `$pattern`, which may result in a longer array than expected, but guarantees that it is at least as long as
* expected.
*
* This method does not currently support the `$flags` or `$offset` parameters.
*
* @param non-empty-string $pattern
* @param array<int, array<int, string>> $matches
*
* @throws \RuntimeException
*/
public function matchAll(string $pattern, string $subject, ?array &$matches = null): int
{
$result = \preg_match_all($pattern, $subject, $matches);

if ($result === false) {
$this->logOrThrowPregLastError();
$result = 0;
$matches = \array_fill(0, \substr_count($pattern, '(') + 1, []);
}

return $result;
}

/**
* Obtains the name of the error constant for `preg_last_error`
* (based on code posted at {@see https://www.php.net/manual/en/function.preg-last-error.php#124124})
Expand Down
174 changes: 174 additions & 0 deletions tests/Unit/Utilities/PregTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ static function (Preg $testSubject): void {
$testSubject->match('/', '');
},
],
'matchAll' => [
static function (Preg $testSubject): void {
$testSubject->matchAll('/', '');
},
],
];
}

Expand Down Expand Up @@ -476,4 +481,173 @@ public function matchSetsMatchesToEmptyArrayOnError(): void

self::assertSame([], $matches);
}

/**
* @return array<non-empty-string, array{
* pattern: non-empty-string,
* subject: string,
* expect: int,
* }>
*/
public function providePregMatchAllArgumentsAndExpectedMatchCount(): array
{
return [
'no match' => [
'pattern' => '/fab/',
'subject' => 'abba',
'expect' => 0,
],
'one match' => [
'pattern' => '/ab/',
'subject' => 'abba',
'expect' => 1,
],
'two matches' => [
'pattern' => '/a/',
'subject' => 'abba',
'expect' => 2,
],
];
}

/**
* @test
*
* @param non-empty-string $pattern
*
* @dataProvider providePregMatchAllArgumentsAndExpectedMatchCount
*/
public function matchAllReturnsMatchCount(string $pattern, string $subject, int $expectedMatchCount): void
{
$testSubject = new Preg();

$result = $testSubject->matchAll($pattern, $subject);

self::assertSame($expectedMatchCount, $result);
}

/**
* @return array<non-empty-string, array{
* pattern: non-empty-string,
* subject: string,
* expect: array<int, array<int, string>>,
* }>
*/
public function providePregMatchAllArgumentsAndExpectedMatches(): array
{
return [
'no match' => [
'pattern' => '/fab/',
'subject' => 'abba',
'expect' => [[]],
],
'one match' => [
'pattern' => '/ab/',
'subject' => 'abba',
'expect' => [['ab']],
],
'two matches' => [
'pattern' => '/a/',
'subject' => 'abba',
'expect' => [['a', 'a']],
],
'with subpattern match' => [
'pattern' => '/a(b)/',
'subject' => 'abba',
'expect' => [['ab'], ['b']],
],
'with two subpattern matches' => [
'pattern' => '/a(b|$)/',
'subject' => 'abba',
'expect' => [['ab', 'a'], ['b', '']],
],
'with matches for two subpatterns' => [
'pattern' => '/a(b(b))/',
'subject' => 'abba',
'expect' => [['abb'], ['bb'], ['b']],
],
];
}

/**
* @test
*
* @param non-empty-string $pattern
* @param array<int, array<int, string>> $expectedMatches
*
* @dataProvider providePregMatchAllArgumentsAndExpectedMatches
*/
public function matchAllSetsMatches(string $pattern, string $subject, array $expectedMatches): void
{
$testSubject = new Preg();

$testSubject->matchAll($pattern, $subject, $matches);

self::assertSame($expectedMatches, $matches);
}

/**
* @test
*/
public function matchAllReturnsZeroOnError(): void
{
$subject = new Preg();

$result = @$subject->matchAll('/', 'abba');

self::assertSame(0, $result);
}

/**
* In the real world it will be valid but complex patterns that fail, but that is impossible to reliably simulate.
*
* @return array<non-empty-string, array{
* pattern: non-empty-string,
* subpatternCount: int,
* }>
*/
public function provideFailingPatternAndSubpatternCount(): array
{
return [
'no subpatterns' => [
'pattern' => '/',
'subpatternCount' => 0,
],
'one subpattern' => [
'pattern' => '/(a)',
'subpatternCount' => 1,
],
'two subpattern' => [
'pattern' => '/(a)(b)',
'subpatternCount' => 2,
],
];
}

/**
* @test
*
* @param non-empty-string $pattern
*
* @dataProvider provideFailingPatternAndSubpatternCount
*/
public function matchAllSetsMatchesToSufficientLengthArrayOfEmptyArraysOnError(
string $pattern,
int $subpatternCount
): void {
$subject = new Preg();

@$subject->matchAll($pattern, 'abba', $matches);

// `assertCountAtLeast` would be more ideal to test the looser documented contract.
self::assertCount($subpatternCount + 1, $matches);

$matchesWithoutEmptyArrays = \array_filter(
$matches,
static function (array $patternOrSubpatternMatches): bool {
return $patternOrSubpatternMatches !== [];
}
);
self::assertCount(0, $matchesWithoutEmptyArrays);
}
}