Skip to content

Commit

Permalink
Merge pull request #131 from spatie/collection-fix
Browse files Browse the repository at this point in the history
Support both Illuminate and Tighten Collection
  • Loading branch information
brendt authored Mar 2, 2018
2 parents 14f998c + f37d9ea commit bdee79c
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

All notable changes to `spatie/crawler` will be documented in this file.

## 4.0.3 - 2018-03-02

- Support both `Illuminate`'s and `Tighten`'s `Collection`.

## 4.0.2 - 2018-03-01

- fix bugs when installing into a Laravel app
Expand Down
17 changes: 11 additions & 6 deletions src/CrawlQueue/CollectionCrawlQueue.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,21 @@
namespace Spatie\Crawler\CrawlQueue;

use Spatie\Crawler\CrawlUrl;
use Tightenco\Collect\Support\Collection;
use Spatie\Crawler\Exception\UrlNotFoundByIndex;

class CollectionCrawlQueue implements CrawlQueue
{
/** @var \Tightenco\Collect\Support\Collection */
/** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
protected $urls;

/** @var \Tightenco\Collect\Support\Collection */
/** @var \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection */
protected $pendingUrls;

public function __construct()
{
$this->urls = new Collection();
$this->urls = collect();

$this->pendingUrls = new Collection();
$this->pendingUrls = collect();
}

public function add(CrawlUrl $url): CrawlQueue
Expand Down Expand Up @@ -91,7 +90,13 @@ public function getFirstPendingUrl()
return $this->pendingUrls->first();
}

protected function contains(Collection $collection, CrawlUrl $searchCrawlUrl): bool
/**
* @param \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection $collection
* @param \Spatie\Crawler\CrawlUrl $searchCrawlUrl
*
* @return bool
*/
protected function contains($collection, CrawlUrl $searchCrawlUrl): bool
{
foreach ($collection as $crawlUrl) {
if ((string) $crawlUrl->url === (string) $searchCrawlUrl->url) {
Expand Down
13 changes: 9 additions & 4 deletions src/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
use Symfony\Component\DomCrawler\Link;
use Psr\Http\Message\ResponseInterface;
use Spatie\Crawler\CrawlQueue\CrawlQueue;
use Tightenco\Collect\Support\Collection;
use GuzzleHttp\Exception\RequestException;
use Spatie\Crawler\CrawlQueue\CollectionCrawlQueue;
use Symfony\Component\DomCrawler\Crawler as DomCrawler;
Expand Down Expand Up @@ -357,7 +356,7 @@ protected function addAllLinksToCrawlQueue(string $html, UriInterface $foundOnUr
{
$allLinks = $this->extractAllLinks($html, $foundOnUrl);

(new Collection($allLinks))
collect($allLinks)
->filter(function (UriInterface $url) {
return $this->hasCrawlableScheme($url);
})
Expand Down Expand Up @@ -400,15 +399,21 @@ protected function shouldCrawl(Node $node): bool
return $node->getDepth() <= $this->maximumDepth;
}

protected function extractAllLinks(string $html, UriInterface $foundOnUrl): Collection
/**
* @param string $html
* @param \Psr\Http\Message\UriInterface $foundOnUrl
*
* @return \Illuminate\Support\Collection|\Tightenco\Collect\Support\Collection|null;
*/
protected function extractAllLinks(string $html, UriInterface $foundOnUrl)
{
if ($this->executeJavaScript) {
$html = $this->getBodyAfterExecutingJavaScript($foundOnUrl);
}

$domCrawler = new DomCrawler($html, $foundOnUrl);

return (new Collection($domCrawler->filterXpath('//a')->links()))
return collect($domCrawler->filterXpath('//a')->links())
->map(function (Link $link) {
try {
return new Uri($link->getUri());
Expand Down

0 comments on commit bdee79c

Please sign in to comment.