From 43164fafc50c6b913f04b2d66407997cc8774588 Mon Sep 17 00:00:00 2001 From: Jens Segers Date: Tue, 21 Feb 2017 10:38:54 +0100 Subject: [PATCH] Use jaybizzle/crawler-detect for robot detection --- README.md | 8 +-- composer.json | 3 +- src/Agent.php | 145 +++++++++++++++++--------------------------- tests/AgentTest.php | 8 +-- 4 files changed, 65 insertions(+), 99 deletions(-) diff --git a/README.md b/README.md index e872645..572164b 100644 --- a/README.md +++ b/README.md @@ -24,13 +24,13 @@ Laravel (optional) Add the service provider in `app/config/app.php`: ```php -'Jenssegers\Agent\AgentServiceProvider', +Jenssegers\Agent\AgentServiceProvider::class, ``` And add the Agent alias to `app/config/app.php`: ```php -'Agent' => 'Jenssegers\Agent\Facades\Agent', +'Agent' => Jenssegers\Agent\Facades\Agent::class, ``` Basic Usage @@ -147,7 +147,7 @@ $agent->isPhone(); ### Robot detection -Check if the user is a robot. +Check if the user is a robot. This uses [jaybizzle/crawler-detect](https://github.com/JayBizzle/Crawler-Detect) to do the actual robot detection. ```php $agent->isRobot(); @@ -155,7 +155,7 @@ $agent->isRobot(); ### Robot name -Get the robot name. Note: this currently only works for major robots like Google, Facebook, Twitter, Bing, Baidu etc ... +Get the robot name. ```php $robot = $agent->robot(); diff --git a/composer.json b/composer.json index 47dec7f..342d2bd 100644 --- a/composer.json +++ b/composer.json @@ -13,7 +13,8 @@ "require": { "php": ">=5.4.0", "illuminate/support": "^4.0|^5.0", - "mobiledetect/mobiledetectlib": "^2.7.6" + "mobiledetect/mobiledetectlib": "^2.7.6", + "jaybizzle/crawler-detect": "^1.2" }, "require-dev": { "phpunit/phpunit": "^4.8|^5.0", diff --git a/src/Agent.php b/src/Agent.php index 32a9936..20f379e 100644 --- a/src/Agent.php +++ b/src/Agent.php @@ -3,10 +3,11 @@ namespace Jenssegers\Agent; use BadMethodCallException; +use Jaybizzle\CrawlerDetect\CrawlerDetect; use Mobile_Detect; -class Agent extends Mobile_Detect { - +class Agent extends Mobile_Detect +{ /** * List of desktop devices. * @@ -75,21 +76,9 @@ class Agent extends Mobile_Detect { ]; /** - * List of robots. - * - * @var array + * @var CrawlerDetect */ - protected static $robots = [ - 'Google' => 'googlebot', - 'MSNBot' => 'msnbot', - 'Baiduspider' => 'baiduspider', - 'Bing' => 'bingbot', - 'Yahoo' => 'yahoo', - 'Lycos' => 'lycos', - 'Facebook' => 'facebookexternalhit', - 'Twitter' => 'Twitterbot', - 'Yandex' => 'Yandex', - ]; + protected static $crawlerDetect; /** * Get all detection rules. These rules include the additional @@ -101,8 +90,7 @@ public function getDetectionRulesExtended() { static $rules; - if (!$rules) - { + if (! $rules) { $rules = $this->mergeRules( static::$additionalDevices, // NEW static::$phoneDevices, @@ -119,45 +107,48 @@ public function getDetectionRulesExtended() } /** - * Retrieve the current set of rules. - * - * @return array + * @inheritdoc */ public function getRules() { - if ($this->detectionType == static::DETECTION_TYPE_EXTENDED) - { + if ($this->detectionType == static::DETECTION_TYPE_EXTENDED) { return static::getDetectionRulesExtended(); - } - else - { + } else { return static::getMobileDetectionRules(); } } + /** + * @return CrawlerDetect + */ + public function getCrawlerDetect() + { + if (self::$crawlerDetect === null) { + self::$crawlerDetect = new CrawlerDetect(); + } + + return self::$crawlerDetect; + } + /** * Get accept languages. * + * @param string $acceptLanguage * @return array */ public function languages($acceptLanguage = null) { - if (! $acceptLanguage) - { + if (! $acceptLanguage) { $acceptLanguage = $this->getHttpHeader('HTTP_ACCEPT_LANGUAGE'); } - if ($acceptLanguage) - { + if ($acceptLanguage) { $languages = []; // Parse accept language string. - foreach (explode(',', $acceptLanguage) as $piece) - { + foreach (explode(',', $acceptLanguage) as $piece) { $parts = explode(';', $piece); - $language = strtolower($parts[0]); - $priority = empty($parts[1]) ? 1. : floatval(str_replace('q=', '', $parts[1])); $languages[$language] = $priority; @@ -175,19 +166,22 @@ public function languages($acceptLanguage = null) /** * Match a detection rule and return the matched key. * - * @param array $rules - * @param null $userAgent + * @param array $rules + * @param null $userAgent * @return string */ protected function findDetectionRulesAgainstUA(array $rules, $userAgent = null) { // Loop given rules - foreach ($rules as $key => $regex) - { - if (empty($regex)) continue; + foreach ($rules as $key => $regex) { + if (empty($regex)) { + continue; + } // Check match - if ($this->match($regex, $userAgent)) return $key ?: reset($this->matchesArray); + if ($this->match($regex, $userAgent)) { + return $key ?: reset($this->matchesArray); + } } return false; @@ -196,6 +190,7 @@ protected function findDetectionRulesAgainstUA(array $rules, $userAgent = null) /** * Get the browser name. * + * @param null $userAgent * @return string */ public function browser($userAgent = null) @@ -256,7 +251,7 @@ public function device($userAgent = null) */ public function isDesktop($userAgent = null, $httpHeaders = null) { - return ! $this->isMobile() && ! $this->isTablet() && ! $this->isRobot(); + return ! $this->isMobile($userAgent, $httpHeaders) && ! $this->isTablet($userAgent, $httpHeaders) && ! $this->isRobot($userAgent); } /** @@ -268,63 +263,44 @@ public function isDesktop($userAgent = null, $httpHeaders = null) */ public function isPhone($userAgent = null, $httpHeaders = null) { - return $this->isMobile() && ! $this->isTablet(); + return $this->isMobile($userAgent, $httpHeaders) && ! $this->isTablet($userAgent, $httpHeaders); } /** * Get the robot name. * * @param string $userAgent - * @return string + * @return string|bool */ public function robot($userAgent = null) { - // Get bot rules - $rules = $this->mergeRules( - static::$robots, // NEW - [static::$utilities['Bot']], - [static::$utilities['MobileBot']] - ); + if ($this->getCrawlerDetect()->isCrawler($userAgent ?: $this->userAgent)) { + return ucfirst($this->getCrawlerDetect()->getMatches()); + } - return $this->findDetectionRulesAgainstUA($rules, $userAgent); + return false; } /** * Check if device is a robot. * - * @param string $userAgent + * @param string $userAgent * @return bool */ public function isRobot($userAgent = null) { - // Get bot rules - $rules = $this->mergeRules( - [static::$utilities['Bot']], - [static::$utilities['MobileBot']], - static::$robots // NEW - ); - - foreach ($rules as $regex) - { - // Check for match - if ($this->match($regex, $userAgent)) return true; - } - - return false; + return $this->getCrawlerDetect()->isCrawler($userAgent ?: $this->userAgent); } /** - * Check the version of the given property in the User-Agent. - * - * @inherit + * @inheritdoc */ public function version($propertyName, $type = self::VERSION_TYPE_STRING) { $check = key(static::$additionalProperties); // Check if the additional properties have been added already - if ( ! array_key_exists($check, parent::$properties)) - { + if (! array_key_exists($check, parent::$properties)) { // TODO: why is mergeRules not working here? parent::$properties = array_merge( parent::$properties, @@ -344,23 +320,15 @@ protected function mergeRules() { $merged = []; - foreach (func_get_args() as $rules) - { - foreach ($rules as $key => $value) - { - if (empty($merged[$key])) - { + foreach (func_get_args() as $rules) { + foreach ($rules as $key => $value) { + if (empty($merged[$key])) { $merged[$key] = $value; - } - else - { - if (is_array($merged[$key])) - { + } else { + if (is_array($merged[$key])) { $merged[$key][] = $value; - } - else - { - $merged[$key] .= '|' . $value; + } else { + $merged[$key] .= '|'.$value; } } } @@ -370,15 +338,12 @@ protected function mergeRules() } /** - * Changing detection type to extended. - * - * @inherit + * @inheritdoc */ public function __call($name, $arguments) { // Make sure the name starts with 'is', otherwise - if (substr($name, 0, 2) != 'is') - { + if (substr($name, 0, 2) != 'is') { throw new BadMethodCallException("No such method exists: $name"); } diff --git a/tests/AgentTest.php b/tests/AgentTest.php index 2ce6dd4..f065993 100644 --- a/tests/AgentTest.php +++ b/tests/AgentTest.php @@ -29,10 +29,10 @@ class AgentTest extends PHPUnit_Framework_TestCase { ]; private $robots = [ - 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' => 'Google', - 'facebookexternalhit/1.1 (+http(s)://www.facebook.com/externalhit_uatext.php)' => 'Facebook', - 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)' => 'Bing', - 'Twitterbot/1.0' => 'Twitter', + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' => 'Googlebot', + 'facebookexternalhit/1.1 (+http(s)://www.facebook.com/externalhit_uatext.php)' => 'Facebookexternalhit', + 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)' => 'Bingbot', + 'Twitterbot/1.0' => 'Twitterbot', 'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)' => 'Yandex', ];