From cb3622128fccec6e7fef9a688b232f10da510c07 Mon Sep 17 00:00:00 2001 From: Bas van Dinther Date: Fri, 13 Jan 2023 13:18:43 +0100 Subject: [PATCH] Check if a relative url is used when performing broken link checks (#17) * Check if relative url and try to build an absolute url * Fix styling * Add tests * Fix styling Co-authored-by: Baspa --- src/Checks/Content/BrokenImageCheck.php | 1 + src/Checks/Content/BrokenLinkCheck.php | 27 +++++++++++--------- src/Seo.php | 4 +++ src/Traits/PerformCheck.php | 4 +++ src/helpers.php | 19 ++++++++++++++ tests/Checks/Content/BrokenLinkCheckTest.php | 13 ++++++++++ 6 files changed, 56 insertions(+), 12 deletions(-) diff --git a/src/Checks/Content/BrokenImageCheck.php b/src/Checks/Content/BrokenImageCheck.php index a5dae2c4..852cb2ed 100644 --- a/src/Checks/Content/BrokenImageCheck.php +++ b/src/Checks/Content/BrokenImageCheck.php @@ -47,6 +47,7 @@ public function validateContent(Crawler $crawler): bool } $content = collect($content)->filter(fn ($value) => $value !== null) + ->map(fn ($link) => addBaseIfRelativeUrl($link, $this->url)) ->filter(fn ($link) => isBrokenLink($link))->toArray(); $this->actualValue = $content; diff --git a/src/Checks/Content/BrokenLinkCheck.php b/src/Checks/Content/BrokenLinkCheck.php index 76772b99..ebcfe8d3 100644 --- a/src/Checks/Content/BrokenLinkCheck.php +++ b/src/Checks/Content/BrokenLinkCheck.php @@ -46,18 +46,21 @@ public function validateContent(Crawler $crawler): bool return true; } - $content = collect($content)->filter(fn ($value) => $value !== null)->filter(function ($link) { - // Filter out all links that are mailto, tel or have a file extension - if (preg_match('/^mailto:/msi', $link) || - preg_match('/^tel:/msi', $link) || - preg_match('/\.[a-z]{2,4}$/msi', $link) || - filter_var($link, FILTER_VALIDATE_URL) === false - ) { - return false; - } - - return $link; - })->filter(fn ($link) => isBrokenLink($link))->toArray(); + $content = collect($content)->filter(fn ($value) => $value !== null) + ->map(fn ($link) => addBaseIfRelativeUrl($link, $this->url)) + ->filter(function ($link) { + // Filter out all links that are mailto, tel or have a file extension + if (preg_match('/^mailto:/msi', $link) || + preg_match('/^tel:/msi', $link) || + preg_match('/\.[a-z]{2,4}$/msi', $link) || + filter_var($link, FILTER_VALIDATE_URL) === false + ) { + return false; + } + + return $link; + }) + ->filter(fn ($link) => isBrokenLink($link))->toArray(); $this->actualValue = $content; diff --git a/src/Seo.php b/src/Seo.php index 5e8c72f0..50009bd0 100755 --- a/src/Seo.php +++ b/src/Seo.php @@ -19,6 +19,8 @@ class Seo */ public ProgressBar|null $progress; + public string $url; + public function __construct( protected Http $http, protected Collection $successful, @@ -29,6 +31,7 @@ public function __construct( public function check(string $url, ProgressBar|null $progress = null): SeoScore { $this->progress = $progress; + $this->url = $url; try { $response = $this->visitPage(url: $url); @@ -66,6 +69,7 @@ private function runChecks(Response $response): void 'checks' => $checks, 'progress' => $this->progress, 'crawler' => $crawler, + 'url' => $this->url, ]) ->through($checks->keys()->toArray()) ->then(function ($data) { diff --git a/src/Traits/PerformCheck.php b/src/Traits/PerformCheck.php index 46431e6b..b4200dc7 100644 --- a/src/Traits/PerformCheck.php +++ b/src/Traits/PerformCheck.php @@ -6,8 +6,12 @@ trait PerformCheck { + public string|null $url = null; + public function __invoke(array $data, Closure $next) { + $this->url = $data['url'] ?? null; + if (! in_array('exit', $data)) { $result = $this->check($data['response'], $data['crawler']); } diff --git a/src/helpers.php b/src/helpers.php index 173fce9c..34826c26 100644 --- a/src/helpers.php +++ b/src/helpers.php @@ -135,3 +135,22 @@ function bytesToHumanReadable(int $bytes): string return round($bytes / (1000 ** $i), 2).' '.$units[$i]; } } + +if (! function_exists('addBaseIfRelativeUrl')) { + function addBaseIfRelativeUrl(string $url, string|null $checkedUrl = null): string + { + if (! Str::startsWith($url, '/')) { + return $url; + } + + if (config('app.url')) { + return config('app.url').$url; + } + + if ($checkedUrl) { + return $checkedUrl.$url; + } + + return $url; + } +} diff --git a/tests/Checks/Content/BrokenLinkCheckTest.php b/tests/Checks/Content/BrokenLinkCheckTest.php index 5eb2cd71..2f4c3871 100644 --- a/tests/Checks/Content/BrokenLinkCheckTest.php +++ b/tests/Checks/Content/BrokenLinkCheckTest.php @@ -42,3 +42,16 @@ $this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler)); }); + +it('can run the broken link check on a relative url', function () { + $check = new BrokenLinkCheck(); + $crawler = new Crawler(); + + Http::fake([ + 'vormkracht10.nl' => Http::response('Vormkracht10', 200), + ]); + + $crawler->addHtmlContent(Http::get('vormkracht10.nl')->body()); + + $this->assertFalse($check->check(Http::get('vormkracht10.nl'), $crawler)); +});