Skip to content

Commit

Permalink
[Feature] Domain custom resolve (#22)
Browse files Browse the repository at this point in the history
* initial commit for support custom domain resolving

* Fix styling

* wip

* Use correct config

* Test data

* wip

* Make curlopt_resolve option array

* wip

* wip

* wip

* Disable cache temporarily

* Fix styling

* wip

* Fix styling

* revert changes

* Fix styling

* Check if ip is not null

* test

* wip

* Remove file extension validating as it is not working as expected

* Remove test code

* Add tests

* Fix styling

---------

Co-authored-by: markvaneijk <[email protected]>
Co-authored-by: Bas van Dinther <[email protected]>
Co-authored-by: Baspa <[email protected]>
  • Loading branch information
4 people authored May 29, 2023
1 parent 453625d commit 45d24a1
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 2 deletions.
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"guzzlehttp/guzzle": "^7.5",
"illuminate/contracts": "^9.0|^10.0",
"j0k3r/php-readability": "^2.0",
"jeremykendall/php-domain-parser": "^6.3",
"spatie/laravel-package-tools": "^1.13",
"symfony/dom-crawler": "^6.2",
"symfony/finder": "^6.2",
Expand Down
14 changes: 14 additions & 0 deletions config/seo.php
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@
'vapor-ui/*',
],

/*
|--------------------------------------------------------------------------
| Domains (DNS resolving)
|--------------------------------------------------------------------------
|
| Here you can add domains and a corresponding IP address
| Can be used for example to bypass certain DNS layers like the Cloudflare proxy,
| or resolve a domain to localhost.
|
*/
'resolve' => [
'example.com' => '127.0.0.1',
],

/*
|--------------------------------------------------------------------------
| Database
Expand Down
3 changes: 1 addition & 2 deletions src/Checks/Content/BrokenLinkCheck.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ public function validateContent(Crawler $crawler): bool
$content = collect($content)->filter(fn ($value) => $value !== null)
->map(fn ($link) => addBaseIfRelativeUrl($link, $this->url))
->filter(function ($link) {
// Filter out all links that are mailto, tel or have a file extension
// Filter out all links that are mailto or tel
if (preg_match('/^mailto:/msi', $link) ||
preg_match('/^tel:/msi', $link) ||
preg_match('/\.[a-z]{2,4}$/msi', $link) ||
filter_var($link, FILTER_VALIDATE_URL) === false
) {
return false;
Expand Down
26 changes: 26 additions & 0 deletions src/helpers.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ function getRemoteStatus(string $url): int
$options = [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_NOBODY => true,
CURLOPT_TIMEOUT => 30,
CURLOPT_FOLLOWLOCATION => true,
];
Expand All @@ -44,6 +45,18 @@ function getRemoteStatus(string $url): int
$options[CURLOPT_SSL_VERIFYSTATUS] = false;
}

$domain = parse_url($url, PHP_URL_HOST);

if (in_array($domain, array_keys(config('seo.resolve')))) {
$port = str_contains($url, 'https://') ? 443 : 80;

$ipAddress = config('seo.resolve')[$domain];

if (! empty($ipAddress)) {
$options[CURLOPT_RESOLVE] = ["{$domain}:{$port}:{$ipAddress}"];
}
}

curl_setopt_array($handle, $options);
curl_exec($handle);

Expand All @@ -70,6 +83,7 @@ function getRemoteFileSize(string $url): int
$handle = curl_init($url);

if (! $handle) {

return 0;
}

Expand All @@ -90,6 +104,18 @@ function getRemoteFileSize(string $url): int
$options[CURLOPT_SSL_VERIFYSTATUS] = false;
}

$domain = parse_url($url, PHP_URL_HOST);

if (in_array($domain, array_keys(config('seo.resolve')))) {
$port = str_contains($url, 'https://') ? 443 : 80;

$ipAddress = config('seo.resolve')[$domain];

if (! empty($ipAddress)) {
$options[CURLOPT_RESOLVE] = ["{$domain}:{$port}:{$ipAddress}"];
}
}

curl_setopt_array($handle, $options);

$data = curl_exec($handle);
Expand Down
34 changes: 34 additions & 0 deletions tests/Checks/Content/BrokenLinkCheckTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,37 @@

$this->assertFalse($check->check(Http::get('vormkracht10.nl'), $crawler));
});

it('can bypass DNS layers using DNS resolving', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl">Vormkracht10</a></body></html>', 200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

config(['seo.resolve' => [
'vormkracht10.nl' => '240.0.0.0',
]]);

$this->assertFalse($check->check(Http::get('vormkracht10.nl'), $crawler));
});

it('cannot bypass DNS layers using a fake IP when DNS resolving', function () {
$check = new BrokenLinkCheck();
$crawler = new Crawler();

config(['seo.resolve' => [
'vormkracht10.nl' => '8.8.8.8',
]]);

Http::fake([
'vormkracht10.nl' => Http::response('<html><head></head><body><a href="https://vormkracht10.nl">Vormkracht10</a></body></html>', 200),
]);

$crawler->addHtmlContent(Http::get('vormkracht10.nl')->body());

$this->assertTrue($check->check(Http::get('vormkracht10.nl'), $crawler));
});

0 comments on commit 45d24a1

Please sign in to comment.