-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Catch all Bing and Googlebots, plus clean up
- Loading branch information
Showing
2 changed files
with
33 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,8 +94,6 @@ var bots = map[string]bool{ | |
"Mozilla/5.0 (compatible; AwarioBot/1.0; +https://awario.com/bots.html)": true, | ||
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html": true, | ||
"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)": true, | ||
"Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)": true, | ||
"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)": true, | ||
"Mozilla/5.0 (compatible; BitSightBot/1.0)": true, | ||
"Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)": true, | ||
"Mozilla/5.0 (compatible; BomboraBot/1.0; +http://www.bombora.com/bot)": true, | ||
|
@@ -119,7 +117,6 @@ var bots = map[string]bool{ | |
"Mozilla/5.0 (compatible; FFZBot/3.0.0; +https://www.frankerfacez.com)": true, | ||
"Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com)": true, | ||
"Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/)": true, | ||
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)": true, | ||
"Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-2014-11-14T15:29:34Z +http://citeseerx.ist.psu.edu/)": true, | ||
"Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20140702-2247 +http://archive.org/details/archive.org_bot)": true, | ||
|
@@ -228,12 +225,8 @@ var bots = map[string]bool{ | |
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Weibo (iPhone14,3__weibo__13.6.3__iphone__os15.1)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 Weibo (iPhone12,8__weibo__13.6.3__iphone__os15.6)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; SiteAuditBot/0.97; +http://www.semrush.com/bot.html)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Laserlikebot/0.1)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; Baiduspider-render/2.0; +http://www.baidu.com/search/spider.html)": true, | ||
"Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1": true, | ||
|
@@ -246,11 +239,6 @@ var bots = map[string]bool{ | |
"Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider; https://zhanzhang.toutiao.com/)": true, | ||
"Mozilla/5.0 (Linux; Android 5.0) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; Bytespider; [email protected])": true, | ||
"Mozilla/5.0 (Linux; Android 5.1.1; A37f) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.74 Mobile Safari/537.36": true, | ||
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.179 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.120 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0 (Linux; Android 6.0.1; vivo 1606 Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36 VivoBrowser/5.7.0.6": true, | ||
"Mozilla/5.0 (Linux; Android 6.0.1; vivo Y66 Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36 VivoBrowser/10.9.14.0": true, | ||
"Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)": true, | ||
|
@@ -317,19 +305,12 @@ var bots = map[string]bool{ | |
"Mozilla/5.0 (X11; U; Linux i686; nl; rv:1.9) Gecko/2008061015 Firefox/3.0": true, | ||
"Mozilla/5.0 (X11; U; Linux i686; pt-BR; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6": true, | ||
"Mozilla/5.0 (X11; U; Linux x86_64; cs-CZ; rv:1.9.1.7) Gecko/20100106 Ubuntu/9.10 (karmic) Firefox/3.5.7": true, | ||
"Mozilla/5.0 (X11; U; Linux x86_64; de; rv:1.9.2.8) Googlebot-Compatible Gecko/20100723 Ubuntu/10.04 (lucid) Firefox/3.6.8": true, | ||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.18) Gecko/20081112 Fedora/2.0.0.18-1.fc8 Firefox/2.0.0.18": true, | ||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.3) Gecko/20090914 Slackware/13.0_stable Firefox/3.5.3": true, | ||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9a1) Gecko/20060112 Firefox/1.6a1": true, | ||
"Mozilla/5.0 (X11; U; Linux x86_64; zh-TW; rv:1.9.0.13) Gecko/2009080315 Ubuntu/9.04 (jaunty) Firefox/3.0.13": true, | ||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Xing Bot": true, | ||
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot": true, | ||
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/100.0.4896.127 Safari/537.36": true, | ||
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/103.0.5060.134 Safari/537.36": true, | ||
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/112.0.0.0 Safari/537.36": true, | ||
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/114.0.5735.179 Safari/537.36": true, | ||
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/79.0.3945.120 Safari/537.36": true, | ||
"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview Analytics) Chrome/27.0.1453 Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0(compatible;Googlebot/2.1; +http://www.google.com/bot.html)": true, | ||
"Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)": true, | ||
"Mozilla/5.0/(compatible; heritrix/3.3.0-SNAPSHOT-20150803-2130 +http://literatur-im-netz.dla-marbach.de)": true, | ||
|
@@ -360,18 +341,13 @@ var bots = map[string]bool{ | |
"repology-linkchecker/1 (+https://repology.org/docs/bots)": true, | ||
"rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, [email protected])": true, | ||
"Ruby": true, | ||
"SafeDNS search bot/Nutch-1.9 (https://www.safedns.com/searchbot; support [at] safedns [dot] com)": true, | ||
"SafeDNSBot (https://www.safedns.com/searchbot)": true, | ||
"SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; http://www.google.com/bot.html)": true, | ||
"SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)": true, | ||
"Screaming Frog SEO Spider/4.1": true, | ||
"Screaming Frog SEO Spider/5.0": true, | ||
"Screaming Frog SEO Spider/5.1": true, | ||
"Sellers.Guide Crawler by Primis": true, | ||
"semanticbot": true, | ||
"SEMrushBot": true, | ||
"SeRanking SEOChecker": true, | ||
"SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)": true, | ||
"SafeDNS search bot/Nutch-1.9 (https://www.safedns.com/searchbot; support [at] safedns [dot] com)": true, | ||
"SafeDNSBot (https://www.safedns.com/searchbot)": true, | ||
"Sellers.Guide Crawler by Primis": true, | ||
"semanticbot": true, | ||
"SEMrushBot": true, | ||
"SeRanking SEOChecker": true, | ||
"SerendeputyBot/0.8.6 (http://serendeputy.com/about/serendeputy-bot)": true, | ||
"serpstatbot/2.1 (advanced backlink tracking bot; https://serpstatbot.com/; [email protected])": true, | ||
"ShowyouBot (http://showyou.com/crawler)": true, | ||
"SiteCheckerBotCrawler/1.0 (+http://sitechecker.pro)": true, | ||
|
@@ -392,25 +368,17 @@ var bots = map[string]bool{ | |
"tiny.write.as": true, | ||
"Traackr.com Bot": true, | ||
"Twingly Recon": true, | ||
"Twingly Recon-Imse/1.0 (+https://app.twingly.com/public-docs/crawler)": true, | ||
"Twingly Recon-Wally/1.0 (+https://app.twingly.com/public-docs/crawler)": true, | ||
"Twitterbot": true, | ||
"voltron": true, | ||
"webprosbot/2.0 (+mailto:[email protected])": true, | ||
"webscraper": true, | ||
"WhatsApp/2.23.12.78 A": true, | ||
"Who.is Bot": true, | ||
"Wotbox/2.01 (+http://www.wotbox.com/bot/)": true, | ||
"wp.com feedbot/1.0 (+https://wp.com)": true, | ||
"Write.as v1.7.0; Android": true, | ||
"write.as": true, | ||
"WriteFreely.org Crawler (https://writefreely.org/instances)": true, | ||
"Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)": true, | ||
"yacybot (-global; amd64 Linux 5.10.0-23-amd64; java 17.0.7; Europe/en) http://yacy.net/bot.html": true, | ||
"yacybot (/global; amd64 Linux 5.15.0-73-generic; java 1.8.0_372; Etc/en) http://yacy.net/bot.html": true, | ||
"yacybot (/global; amd64 Windows 10 10.0; java 11.0.18; Europe/pl) http://yacy.net/bot.html": true, | ||
"yacybot (/global; x86_64 Mac OS X 10.11.4; java 1.8.0_77; America/en) http://yacy.net/bot.html": true, | ||
"yacybot (freeworld/global; amd64 Linux 3.12.43-52.6-default; java 1.8.0_40; Europe/en) http://yacy.net/bot.html": true, | ||
"Twitterbot": true, | ||
"voltron": true, | ||
"webprosbot/2.0 (+mailto:[email protected])": true, | ||
"webscraper": true, | ||
"Who.is Bot": true, | ||
"Wotbox/2.01 (+http://www.wotbox.com/bot/)": true, | ||
"wp.com feedbot/1.0 (+https://wp.com)": true, | ||
"Write.as v1.7.0; Android": true, | ||
"write.as": true, | ||
"WriteFreely.org Crawler (https://writefreely.org/instances)": true, | ||
"Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)": true, | ||
"YisouSpider": true, | ||
"ZoominfoBot (zoominfobot at zoominfo dot com)": true, | ||
} | ||
|
@@ -445,8 +413,15 @@ var botPrefixes = []string{ | |
"semanticbot ", | ||
"SummalyBot/", | ||
"TelegramBot", | ||
"Twingly Recon-", | ||
"Twitterbot/", | ||
"WhatsApp/", | ||
"yacybot ", | ||
} | ||
|
||
var botPhrases = []string{ | ||
"bingbot", | ||
"Googlebot", | ||
} | ||
|
||
// IsBot returns whether or not the provided User-Agent string is a known bot | ||
|
@@ -463,5 +438,10 @@ func IsBot(ua string) bool { | |
return true | ||
} | ||
} | ||
for _, p := range botPhrases { | ||
if strings.Contains(ua, p) { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters