Skip to content

Commit

Permalink
Use jaybizzle/crawler-detect for robot detection
Browse files Browse the repository at this point in the history
  • Loading branch information
jenssegers committed Feb 21, 2017
1 parent 4aa50dc commit 43164fa
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 99 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ Laravel (optional)
Add the service provider in `app/config/app.php`:

```php
'Jenssegers\Agent\AgentServiceProvider',
Jenssegers\Agent\AgentServiceProvider::class,
```

And add the Agent alias to `app/config/app.php`:

```php
'Agent' => 'Jenssegers\Agent\Facades\Agent',
'Agent' => Jenssegers\Agent\Facades\Agent::class,
```

Basic Usage
Expand Down Expand Up @@ -147,15 +147,15 @@ $agent->isPhone();

### Robot detection

Check if the user is a robot.
Check if the user is a robot. This uses [jaybizzle/crawler-detect](https://github.com/JayBizzle/Crawler-Detect) to do the actual robot detection.

```php
$agent->isRobot();
```

### Robot name

Get the robot name. Note: this currently only works for major robots like Google, Facebook, Twitter, Bing, Baidu etc ...
Get the robot name.

```php
$robot = $agent->robot();
Expand Down
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"require": {
"php": ">=5.4.0",
"illuminate/support": "^4.0|^5.0",
"mobiledetect/mobiledetectlib": "^2.7.6"
"mobiledetect/mobiledetectlib": "^2.7.6",
"jaybizzle/crawler-detect": "^1.2"
},
"require-dev": {
"phpunit/phpunit": "^4.8|^5.0",
Expand Down
145 changes: 55 additions & 90 deletions src/Agent.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
namespace Jenssegers\Agent;

use BadMethodCallException;
use Jaybizzle\CrawlerDetect\CrawlerDetect;
use Mobile_Detect;

class Agent extends Mobile_Detect {

class Agent extends Mobile_Detect
{
/**
* List of desktop devices.
*
Expand Down Expand Up @@ -75,21 +76,9 @@ class Agent extends Mobile_Detect {
];

/**
* List of robots.
*
* @var array
* @var CrawlerDetect
*/
protected static $robots = [
'Google' => 'googlebot',
'MSNBot' => 'msnbot',
'Baiduspider' => 'baiduspider',
'Bing' => 'bingbot',
'Yahoo' => 'yahoo',
'Lycos' => 'lycos',
'Facebook' => 'facebookexternalhit',
'Twitter' => 'Twitterbot',
'Yandex' => 'Yandex',
];
protected static $crawlerDetect;

/**
* Get all detection rules. These rules include the additional
Expand All @@ -101,8 +90,7 @@ public function getDetectionRulesExtended()
{
static $rules;

if (!$rules)
{
if (! $rules) {
$rules = $this->mergeRules(
static::$additionalDevices, // NEW
static::$phoneDevices,
Expand All @@ -119,45 +107,48 @@ public function getDetectionRulesExtended()
}

/**
* Retrieve the current set of rules.
*
* @return array
* @inheritdoc
*/
public function getRules()
{
if ($this->detectionType == static::DETECTION_TYPE_EXTENDED)
{
if ($this->detectionType == static::DETECTION_TYPE_EXTENDED) {
return static::getDetectionRulesExtended();
}
else
{
} else {
return static::getMobileDetectionRules();
}
}

/**
* @return CrawlerDetect
*/
public function getCrawlerDetect()
{
if (self::$crawlerDetect === null) {
self::$crawlerDetect = new CrawlerDetect();
}

return self::$crawlerDetect;
}

/**
* Get accept languages.
*
* @param string $acceptLanguage
* @return array
*/
public function languages($acceptLanguage = null)
{
if (! $acceptLanguage)
{
if (! $acceptLanguage) {
$acceptLanguage = $this->getHttpHeader('HTTP_ACCEPT_LANGUAGE');
}

if ($acceptLanguage)
{
if ($acceptLanguage) {
$languages = [];

// Parse accept language string.
foreach (explode(',', $acceptLanguage) as $piece)
{
foreach (explode(',', $acceptLanguage) as $piece) {
$parts = explode(';', $piece);

$language = strtolower($parts[0]);

$priority = empty($parts[1]) ? 1. : floatval(str_replace('q=', '', $parts[1]));

$languages[$language] = $priority;
Expand All @@ -175,19 +166,22 @@ public function languages($acceptLanguage = null)
/**
* Match a detection rule and return the matched key.
*
* @param array $rules
* @param null $userAgent
* @param array $rules
* @param null $userAgent
* @return string
*/
protected function findDetectionRulesAgainstUA(array $rules, $userAgent = null)
{
// Loop given rules
foreach ($rules as $key => $regex)
{
if (empty($regex)) continue;
foreach ($rules as $key => $regex) {
if (empty($regex)) {
continue;
}

// Check match
if ($this->match($regex, $userAgent)) return $key ?: reset($this->matchesArray);
if ($this->match($regex, $userAgent)) {
return $key ?: reset($this->matchesArray);
}
}

return false;
Expand All @@ -196,6 +190,7 @@ protected function findDetectionRulesAgainstUA(array $rules, $userAgent = null)
/**
* Get the browser name.
*
* @param null $userAgent
* @return string
*/
public function browser($userAgent = null)
Expand Down Expand Up @@ -256,7 +251,7 @@ public function device($userAgent = null)
*/
public function isDesktop($userAgent = null, $httpHeaders = null)
{
return ! $this->isMobile() && ! $this->isTablet() && ! $this->isRobot();
return ! $this->isMobile($userAgent, $httpHeaders) && ! $this->isTablet($userAgent, $httpHeaders) && ! $this->isRobot($userAgent);
}

/**
Expand All @@ -268,63 +263,44 @@ public function isDesktop($userAgent = null, $httpHeaders = null)
*/
public function isPhone($userAgent = null, $httpHeaders = null)
{
return $this->isMobile() && ! $this->isTablet();
return $this->isMobile($userAgent, $httpHeaders) && ! $this->isTablet($userAgent, $httpHeaders);
}

/**
* Get the robot name.
*
* @param string $userAgent
* @return string
* @return string|bool
*/
public function robot($userAgent = null)
{
// Get bot rules
$rules = $this->mergeRules(
static::$robots, // NEW
[static::$utilities['Bot']],
[static::$utilities['MobileBot']]
);
if ($this->getCrawlerDetect()->isCrawler($userAgent ?: $this->userAgent)) {
return ucfirst($this->getCrawlerDetect()->getMatches());
}

return $this->findDetectionRulesAgainstUA($rules, $userAgent);
return false;
}

/**
* Check if device is a robot.
*
* @param string $userAgent
* @param string $userAgent
* @return bool
*/
public function isRobot($userAgent = null)
{
// Get bot rules
$rules = $this->mergeRules(
[static::$utilities['Bot']],
[static::$utilities['MobileBot']],
static::$robots // NEW
);

foreach ($rules as $regex)
{
// Check for match
if ($this->match($regex, $userAgent)) return true;
}

return false;
return $this->getCrawlerDetect()->isCrawler($userAgent ?: $this->userAgent);
}

/**
* Check the version of the given property in the User-Agent.
*
* @inherit
* @inheritdoc
*/
public function version($propertyName, $type = self::VERSION_TYPE_STRING)
{
$check = key(static::$additionalProperties);

// Check if the additional properties have been added already
if ( ! array_key_exists($check, parent::$properties))
{
if (! array_key_exists($check, parent::$properties)) {
// TODO: why is mergeRules not working here?
parent::$properties = array_merge(
parent::$properties,
Expand All @@ -344,23 +320,15 @@ protected function mergeRules()
{
$merged = [];

foreach (func_get_args() as $rules)
{
foreach ($rules as $key => $value)
{
if (empty($merged[$key]))
{
foreach (func_get_args() as $rules) {
foreach ($rules as $key => $value) {
if (empty($merged[$key])) {
$merged[$key] = $value;
}
else
{
if (is_array($merged[$key]))
{
} else {
if (is_array($merged[$key])) {
$merged[$key][] = $value;
}
else
{
$merged[$key] .= '|' . $value;
} else {
$merged[$key] .= '|'.$value;
}
}
}
Expand All @@ -370,15 +338,12 @@ protected function mergeRules()
}

/**
* Changing detection type to extended.
*
* @inherit
* @inheritdoc
*/
public function __call($name, $arguments)
{
// Make sure the name starts with 'is', otherwise
if (substr($name, 0, 2) != 'is')
{
if (substr($name, 0, 2) != 'is') {
throw new BadMethodCallException("No such method exists: $name");
}

Expand Down
8 changes: 4 additions & 4 deletions tests/AgentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ class AgentTest extends PHPUnit_Framework_TestCase {
];

private $robots = [
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' => 'Google',
'facebookexternalhit/1.1 (+http(s)://www.facebook.com/externalhit_uatext.php)' => 'Facebook',
'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)' => 'Bing',
'Twitterbot/1.0' => 'Twitter',
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' => 'Googlebot',
'facebookexternalhit/1.1 (+http(s)://www.facebook.com/externalhit_uatext.php)' => 'Facebookexternalhit',
'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)' => 'Bingbot',
'Twitterbot/1.0' => 'Twitterbot',
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)' => 'Yandex',
];

Expand Down

5 comments on commit 43164fa

@JayBizzle
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I approve 👍 😄

@jenssegers
Copy link
Owner Author

@jenssegers jenssegers commented on 43164fa Feb 21, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JayBizzle Backlink = love 😁

@earnaway
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

@kyranb
Copy link

@kyranb kyranb commented on 43164fa Mar 6, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Loving the improvements 👍

It might be worth highlighting the fact that robot names are now returned in their raw user agent form. Eg TwitterBot instead of Twitter and facebookexternalhit instead of Facebook.

I was looking for Agent::Robot == 'Twitter' in my application and this update broke that.

@jenssegers
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kyranb Probably I should have bumped the major version instead of the minor :)

Please sign in to comment.