Skip to content

Commit

Permalink
Added OpenAI and Claude bots.
Browse files Browse the repository at this point in the history
Added tests.
Updated dependencies.
  • Loading branch information
hexydec committed Aug 1, 2024
1 parent b3892bd commit f23f690
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 29 deletions.
53 changes: 27 additions & 26 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 7 additions & 3 deletions src/mappings/crawlers.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public static function getApp(string $value, array $data = []) : array {
'yisouspider' => 'search',
'360spider' => 'search',
'sogou web spider' => 'search',
'bytespider' => 'crawler'
'bytespider' => 'crawler',
'claudebot' => 'ai'
];
$apps = [
'yacybot' => 'YacyBot',
Expand Down Expand Up @@ -124,7 +125,9 @@ public static function getApp(string $value, array $data = []) : array {
'googledocs' => 'Google Docs',
'user-agent: seolyt' => 'SEOlyt',
'bytespider' => 'ByteDance Spider',
'[email protected]' => 'ByteDance Spider'
'[email protected]' => 'ByteDance Spider',
'oai-searchbot' => 'OpenAI SearchBot',
'chatgpt-user' => 'ChatGPT User'
];

$lower = \mb_strtolower($parts[0]);
Expand Down Expand Up @@ -391,14 +394,15 @@ public static function get() : array {
'http-client/' => new props('any', $fn['scraper']),
'HttpClient/' => new props('any', $fn['scraper']),
'PowerShell/' => new props('start', $fn['scraper']),
'OAI-SearchBot/' => new props('start', $fn['search']),
'GPTBot/' => new props('start', $fn['ai']),
'Diffbot/' => new props('start', $fn['ai']),
'Amazonbot/' => new props('start', $fn['ai']),
'Applebot/' => new props('start', $fn['ai']),
'PerplexityBot/' => new props('start', $fn['ai']),
'YouBot/' => new props('start', $fn['ai']),
'Google-Extended' => new props('start', $fn['ai']),
'ChatGPT-User/' => new props('start', $fn['feed']),
'ChatGPT-User/' => new props('start', $fn['ai']),
'facebookexternalhit/' => new props('start', $fn['feed']),
'facebookcatalog/' => new props('start', $fn['feed']),
'Validator' => new props('any', $fn['validator']),
Expand Down
27 changes: 27 additions & 0 deletions tests/crawlersTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,15 @@ public function testSearch() : void {
'appname' => 'YandexRenderResourcesBot',
'appversion' => '1.0',
'url' => 'http://yandex.com/bots'
],
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot' => [
'string' => 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot',
'type' => 'robot',
'category' => 'search',
'app' => 'OpenAI SearchBot',
'appname' => 'OAI-SearchBot',
'appversion' => '1.0',
'url' => 'https://openai.com/searchbot'
]
];
foreach ($strings AS $ua => $item) {
Expand Down Expand Up @@ -589,6 +598,15 @@ public function testFeed() : void {
'appname' => 'filemanager.downloads.http.client - contentfilemanagerdaemons-downloadFromUrlTq2Worke',
'appversion' => '2.0',
'url' => 'http://dev.hubspot.com/'
],
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot' => [
'string' => 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot',
'type' => 'robot',
'category' => 'ai',
'app' => 'ChatGPT User',
'appname' => 'ChatGPT-User',
'appversion' => '1.0',
'url' => 'https://openai.com/bot'
]
];
foreach ($strings AS $ua => $item) {
Expand Down Expand Up @@ -1278,6 +1296,15 @@ public function testAi() : void {
'appname' => 'YouBot',
'appversion' => '1.0',
'url' => 'https://about.you.com/youbot/'
],
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot' => [
'string' => 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; GPTBot/1.1; +https://openai.com/gptbot',
'type' => 'robot',
'category' => 'ai',
'app' => 'GPTBot',
'appname' => 'GPTBot',
'appversion' => '1.1',
'url' => 'https://openai.com/gptbot'
]
];
foreach ($strings AS $ua => $item) {
Expand Down

0 comments on commit f23f690

Please sign in to comment.