Skip to content

Commit

Permalink
Catch all Bing and Googlebots, plus clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
thebaer committed Jul 6, 2023
1 parent dee1e7c commit 7dc8a92
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 51 deletions.
80 changes: 30 additions & 50 deletions bots/bots.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ var bots = map[string]bool{
"Mozilla/5.0 (compatible; AwarioBot/1.0; +https://awario.com/bots.html)": true,
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html": true,
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)": true,
"Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)": true,
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)": true,
"Mozilla/5.0 (compatible; BitSightBot/1.0)": true,
"Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)": true,
"Mozilla/5.0 (compatible; BomboraBot/1.0; +http://www.bombora.com/bot)": true,
Expand All @@ -119,7 +117,6 @@ var bots = map[string]bool{
"Mozilla/5.0 (compatible; FFZBot/3.0.0; +https://www.frankerfacez.com)": true,
"Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com)": true,
"Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/)": true,
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)": true,
"Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-2014-11-14T15:29:34Z +http://citeseerx.ist.psu.edu/)": true,
"Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20140702-2247 +http://archive.org/details/archive.org_bot)": true,
Expand Down Expand Up @@ -228,12 +225,8 @@ var bots = map[string]bool{
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Weibo (iPhone14,3__weibo__13.6.3__iphone__os15.1)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Weibo (iPhone12,8__weibo__13.6.3__iphone__os15.6)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; SiteAuditBot/0.97; +http://www.semrush.com/bot.html)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Laserlikebot/0.1)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)": true,
"Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1": true,
Expand All @@ -246,11 +239,6 @@ var bots = map[string]bool{
"Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider; https://zhanzhang.toutiao.com/)": true,
"Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider; [email protected])": true,
"Mozilla/5.0 (Linux; Android 5.1.1; A37f) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.74 Mobile Safari/537.36": true,
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.179 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.120 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0 (Linux; Android 6.0.1; vivo 1606 Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36 VivoBrowser/5.7.0.6": true,
"Mozilla/5.0 (Linux; Android 6.0.1; vivo Y66 Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36 VivoBrowser/10.9.14.0": true,
"Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)": true,
Expand Down Expand Up @@ -317,19 +305,12 @@ var bots = map[string]bool{
"Mozilla/5.0 (X11; U; Linux i686; nl; rv:1.9) Gecko/2008061015 Firefox/3.0": true,
"Mozilla/5.0 (X11; U; Linux i686; pt-BR; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6": true,
"Mozilla/5.0 (X11; U; Linux x86_64; cs-CZ; rv:1.9.1.7) Gecko/20100106 Ubuntu/9.10 (karmic) Firefox/3.5.7": true,
"Mozilla/5.0 (X11; U; Linux x86_64; de; rv:1.9.2.8) Googlebot-Compatible Gecko/20100723 Ubuntu/10.04 (lucid) Firefox/3.6.8": true,
"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.18) Gecko/20081112 Fedora/2.0.0.18-1.fc8 Firefox/2.0.0.18": true,
"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.3) Gecko/20090914 Slackware/13.0_stable Firefox/3.5.3": true,
"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9a1) Gecko/20060112 Firefox/1.6a1": true,
"Mozilla/5.0 (X11; U; Linux x86_64; zh-TW; rv:1.9.0.13) Gecko/2009080315 Ubuntu/9.04 (jaunty) Firefox/3.0.13": true,
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Xing Bot": true,
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot": true,
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/100.0.4896.127 Safari/537.36": true,
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/103.0.5060.134 Safari/537.36": true,
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/112.0.0.0 Safari/537.36": true,
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/114.0.5735.179 Safari/537.36": true,
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36": true,
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview Analytics) Chrome/27.0.1453 Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0(compatible;Googlebot/2.1; +http://www.google.com/bot.html)": true,
"Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)": true,
"Mozilla/5.0/(compatible; heritrix/3.3.0-SNAPSHOT-20150803-2130 +http://literatur-im-netz.dla-marbach.de)": true,
Expand Down Expand Up @@ -360,18 +341,13 @@ var bots = map[string]bool{
"repology-linkchecker/1 (+https://repology.org/docs/bots)": true,
"rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, [email protected])": true,
"Ruby": true,
"SafeDNS search bot/Nutch-1.9 (https://www.safedns.com/searchbot; support [at] safedns [dot] com)": true,
"SafeDNSBot (https://www.safedns.com/searchbot)": true,
"SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; http://www.google.com/bot.html)": true,
"SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)": true,
"Screaming Frog SEO Spider/4.1": true,
"Screaming Frog SEO Spider/5.0": true,
"Screaming Frog SEO Spider/5.1": true,
"Sellers.Guide Crawler by Primis": true,
"semanticbot": true,
"SEMrushBot": true,
"SeRanking SEOChecker": true,
"SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)": true,
"SafeDNS search bot/Nutch-1.9 (https://www.safedns.com/searchbot; support [at] safedns [dot] com)": true,
"SafeDNSBot (https://www.safedns.com/searchbot)": true,
"Sellers.Guide Crawler by Primis": true,
"semanticbot": true,
"SEMrushBot": true,
"SeRanking SEOChecker": true,
"SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)": true,
"serpstatbot/2.1 (advanced backlink tracking bot; https://serpstatbot.com/; [email protected])": true,
"ShowyouBot (http://showyou.com/crawler)": true,
"SiteCheckerBotCrawler/1.0 (+http://sitechecker.pro)": true,
Expand All @@ -392,25 +368,17 @@ var bots = map[string]bool{
"tiny.write.as": true,
"Traackr.com Bot": true,
"Twingly Recon": true,
"Twingly Recon-Imse/1.0 (+https://app.twingly.com/public-docs/crawler)": true,
"Twingly Recon-Wally/1.0 (+https://app.twingly.com/public-docs/crawler)": true,
"Twitterbot": true,
"voltron": true,
"webprosbot/2.0 (+mailto:[email protected])": true,
"webscraper": true,
"WhatsApp/2.23.12.78 A": true,
"Who.is Bot": true,
"Wotbox/2.01 (+http://www.wotbox.com/bot/)": true,
"wp.com feedbot/1.0 (+https://wp.com)": true,
"Write.as v1.7.0; Android": true,
"write.as": true,
"WriteFreely.org Crawler (https://writefreely.org/instances)": true,
"Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)": true,
"yacybot (-global; amd64 Linux 5.10.0-23-amd64; java 17.0.7; Europe/en) http://yacy.net/bot.html": true,
"yacybot (/global; amd64 Linux 5.15.0-73-generic; java 1.8.0_372; Etc/en) http://yacy.net/bot.html": true,
"yacybot (/global; amd64 Windows 10 10.0; java 11.0.18; Europe/pl) http://yacy.net/bot.html": true,
"yacybot (/global; x86_64 Mac OS X 10.11.4; java 1.8.0_77; America/en) http://yacy.net/bot.html": true,
"yacybot (freeworld/global; amd64 Linux 3.12.43-52.6-default; java 1.8.0_40; Europe/en) http://yacy.net/bot.html": true,
"Twitterbot": true,
"voltron": true,
"webprosbot/2.0 (+mailto:[email protected])": true,
"webscraper": true,
"Who.is Bot": true,
"Wotbox/2.01 (+http://www.wotbox.com/bot/)": true,
"wp.com feedbot/1.0 (+https://wp.com)": true,
"Write.as v1.7.0; Android": true,
"write.as": true,
"WriteFreely.org Crawler (https://writefreely.org/instances)": true,
"Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)": true,
"YisouSpider": true,
"ZoominfoBot (zoominfobot at zoominfo dot com)": true,
}
Expand Down Expand Up @@ -445,8 +413,15 @@ var botPrefixes = []string{
"semanticbot ",
"SummalyBot/",
"TelegramBot",
"Twingly Recon-",
"Twitterbot/",
"WhatsApp/",
"yacybot ",
}

var botPhrases = []string{
"bingbot",
"Googlebot",
}

// IsBot returns whether or not the provided User-Agent string is a known bot
Expand All @@ -463,5 +438,10 @@ func IsBot(ua string) bool {
return true
}
}
for _, p := range botPhrases {
if strings.Contains(ua, p) {
return true
}
}
return false
}
4 changes: 3 additions & 1 deletion bots/bots_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import "testing"

func TestIsBot(t *testing.T) {
tests := map[string]bool{
"Twitterbot/1.0": true,
"Twitterbot/1.0": true,
"http.rb/2.2.2 (Mastodon/1.6.0; +https://insolente.im/)": true,
"http.rb/2.2.2 (Mastodon/1.5.1; +https://mastodon.cloud/)": true,
"http.rb/2.2.2 (Mastodon/1.6.0rc5; +https://mastodon.sdf.org/)": true,
Expand All @@ -13,6 +13,8 @@ func TestIsBot(t *testing.T) {
"Mozilla/5.0 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)": true,
"Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)": true,
"Mozilla/5.0 (compatible; AhrefsBot/5.2; +http://ahrefs.com/robot/)": true,

"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true,
}

for ua, r := range tests {
Expand Down

0 comments on commit 7dc8a92

Please sign in to comment.