From d47d45d8d66a3ed1738462255e2f141b06ed2fee Mon Sep 17 00:00:00 2001 From: Vincent K Date: Sat, 19 Jan 2019 16:55:52 +1100 Subject: [PATCH 1/3] New Bot Majestic SEO Majestic SEO Crawler Added --- data/applications-bots.php | 1 + data/regexes/applications-bots.php | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/applications-bots.php b/data/applications-bots.php index 2490f67fe..79baabda4 100644 --- a/data/applications-bots.php +++ b/data/applications-bots.php @@ -146,6 +146,7 @@ [ 'name' => 'Livedoor', 'id' => 'livedoor', 'regexp' => '/livedoor/u' ], [ 'name' => 'LTX71', 'id' => 'ltx71', 'regexp' => '/ltx71/u' ], [ 'name' => 'Magpie RSS', 'id' => 'magpie', 'regexp' => '/MagpieRSS\/([0-9.]*)/u' ], + [ 'name' => 'Majestic', 'id' => 'majestic', 'regexp' => '/MJ12bot\/([0-9.]*)/u' ], [ 'name' => 'Mapian News Bot', 'id' => 'mapion', 'regexp' => '/mapion-news-bot\/([0-9.]*)/u' ], [ 'name' => 'Microsoft Social Streams', 'id' => 'socialstreams', 'regexp' => '/Microsoft MSN SocialStreams Bot/u' ], [ 'name' => 'Mixi', 'id' => 'mixi', 'regexp' => '/mixi-check\/([0-9.]*)/u' ], diff --git a/data/regexes/applications-bots.php b/data/regexes/applications-bots.php index 9d155c550..0ec100e8b 100644 --- a/data/regexes/applications-bots.php +++ b/data/regexes/applications-bots.php @@ -2,4 +2,4 @@ namespace WhichBrowser\Data; -Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|ahrefsbot|archiver|altavista|curious|cloudfront|amorank|archive|jeeves|astra|backlink|baidu|bazqux|bingbot|msnbot|msmobot|bing|blogbridge|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|browsershots|bubing|butterfly|heritrix|cliqz|cloudflare|comodo|commafeed|cbot|watchdog|datasearch|daumoa|digg|domain|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|friendica|robot|genieo|fetchor|google|gomez|ichiro|gooblog|goorss|grammarly|grub|hatena|capture|heureka|htdig|httpmon|hubpages|spider|crawler|kouio|larbin|linkedin|linkdex|livedoor|ltx71|magpie|mapion|socialstreams|mixi|mnogo|monitor|yeti|netcraft|netvibes|newsblur|newsgator|orange|pages|psbot|pingdom|pinterest|postrank|comment|rssbar|hunter|quora|safesearch|scrapy|seznam|shopwiki|reader|simplepie|site24|sogou|sophora|soso|spdycheck|spinn|detector|summify|telegram|twisted|twitter|tiny|tlsprober|typhoeus|vagabondo|voila|vocus|mcrawler|w3bot|jigsaw|w3c|wayback|webindexer|wordpress|mechanize|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|httpclient|nutch|synapse|indy|wget|curl|package|java|simple|libwww|lwp|urllib|php|pear|zend|ruby)/i'; +Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|ahrefsbot|archiver|altavista|curious|cloudfront|amorank|archive|jeeves|astra|backlink|baidu|bazqux|bingbot|msnbot|msmobot|bing|blogbridge|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|browsershots|bubing|butterfly|heritrix|cliqz|cloudflare|comodo|commafeed|cbot|watchdog|datasearch|daumoa|digg|domain|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|friendica|robot|genieo|fetchor|google|gomez|ichiro|gooblog|goorss|grammarly|grub|hatena|capture|heureka|htdig|httpmon|hubpages|spider|crawler|kouio|larbin|linkedin|linkdex|livedoor|ltx71|magpie|mj12bot|mapion|socialstreams|mixi|mnogo|monitor|yeti|netcraft|netvibes|newsblur|newsgator|orange|pages|psbot|pingdom|pinterest|postrank|comment|rssbar|hunter|quora|safesearch|scrapy|seznam|shopwiki|reader|simplepie|site24|sogou|sophora|soso|spdycheck|spinn|detector|summify|telegram|twisted|twitter|tiny|tlsprober|typhoeus|vagabondo|voila|vocus|mcrawler|w3bot|jigsaw|w3c|wayback|webindexer|wordpress|mechanize|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|httpclient|nutch|synapse|indy|wget|curl|package|java|simple|libwww|lwp|urllib|php|pear|zend|ruby)/i'; From 90a789b3b65202828a5d02aebc35eb54faa0dae6 Mon Sep 17 00:00:00 2001 From: Vincent K Date: Sat, 19 Jan 2019 17:00:10 +1100 Subject: [PATCH 2/3] New Bot HTTPUnit for Java Added http://httpunit.sourceforge.net/ --- data/applications-bots.php | 1 + data/regexes/applications-bots.php | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/applications-bots.php b/data/applications-bots.php index 79baabda4..70c2f8985 100644 --- a/data/applications-bots.php +++ b/data/applications-bots.php @@ -281,6 +281,7 @@ [ 'name' => 'Go', 'id' => 'package', 'regexp' => '/Go [0-9\.]+ package http/u' ], [ 'name' => 'Java', 'id' => 'java', 'regexp' => '/^Java\/([0-9.]*)/u' ], + [ 'name' => 'Java', 'id' => 'java', 'regexp' => '/^httpunit\/([0-9.]*)/u' ], [ 'name' => 'Perl', 'id' => 'simple', 'regexp' => '/LWP::Simple\//u' ], [ 'name' => 'Perl', 'id' => 'libwww', 'regexp' => '/libwww-perl\//u' ], [ 'name' => 'Perl', 'id' => 'lwp', 'regexp' => '/lwp-trivial\//u' ], diff --git a/data/regexes/applications-bots.php b/data/regexes/applications-bots.php index 0ec100e8b..2c7e5ba56 100644 --- a/data/regexes/applications-bots.php +++ b/data/regexes/applications-bots.php @@ -2,4 +2,4 @@ namespace WhichBrowser\Data; -Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|ahrefsbot|archiver|altavista|curious|cloudfront|amorank|archive|jeeves|astra|backlink|baidu|bazqux|bingbot|msnbot|msmobot|bing|blogbridge|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|browsershots|bubing|butterfly|heritrix|cliqz|cloudflare|comodo|commafeed|cbot|watchdog|datasearch|daumoa|digg|domain|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|friendica|robot|genieo|fetchor|google|gomez|ichiro|gooblog|goorss|grammarly|grub|hatena|capture|heureka|htdig|httpmon|hubpages|spider|crawler|kouio|larbin|linkedin|linkdex|livedoor|ltx71|magpie|mj12bot|mapion|socialstreams|mixi|mnogo|monitor|yeti|netcraft|netvibes|newsblur|newsgator|orange|pages|psbot|pingdom|pinterest|postrank|comment|rssbar|hunter|quora|safesearch|scrapy|seznam|shopwiki|reader|simplepie|site24|sogou|sophora|soso|spdycheck|spinn|detector|summify|telegram|twisted|twitter|tiny|tlsprober|typhoeus|vagabondo|voila|vocus|mcrawler|w3bot|jigsaw|w3c|wayback|webindexer|wordpress|mechanize|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|httpclient|nutch|synapse|indy|wget|curl|package|java|simple|libwww|lwp|urllib|php|pear|zend|ruby)/i'; +Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|ahrefsbot|archiver|altavista|curious|cloudfront|amorank|archive|jeeves|astra|backlink|baidu|bazqux|bingbot|msnbot|msmobot|bing|blogbridge|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|browsershots|bubing|butterfly|heritrix|cliqz|cloudflare|comodo|commafeed|cbot|watchdog|datasearch|daumoa|digg|domain|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|friendica|robot|genieo|fetchor|google|gomez|ichiro|gooblog|goorss|grammarly|grub|hatena|capture|heureka|htdig|httpmon|hubpages|spider|crawler|kouio|larbin|linkedin|linkdex|livedoor|ltx71|magpie|mj12bot|mapion|socialstreams|mixi|mnogo|monitor|yeti|netcraft|netvibes|newsblur|newsgator|orange|pages|psbot|pingdom|pinterest|postrank|comment|rssbar|hunter|quora|safesearch|scrapy|seznam|shopwiki|reader|simplepie|site24|sogou|sophora|soso|spdycheck|spinn|detector|summify|telegram|twisted|twitter|tiny|tlsprober|typhoeus|vagabondo|voila|vocus|mcrawler|w3bot|jigsaw|w3c|wayback|webindexer|wordpress|mechanize|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|httpclient|httpunit|nutch|synapse|indy|wget|curl|package|java|simple|libwww|lwp|urllib|php|pear|zend|ruby)/i'; From 2ebfbc629626847025da0ea96d4cbab56f87a1c6 Mon Sep 17 00:00:00 2001 From: Vincent K Date: Sat, 19 Jan 2019 17:02:33 +1100 Subject: [PATCH 3/3] Added New Bot Roger (Moz.com) Added Moz.com SEO Crawler --- data/applications-bots.php | 1 + data/regexes/applications-bots.php | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/data/applications-bots.php b/data/applications-bots.php index 70c2f8985..9d30563ba 100644 --- a/data/applications-bots.php +++ b/data/applications-bots.php @@ -173,6 +173,7 @@ [ 'name' => 'PowerMapper', 'id' => 'crawler', 'regexp' => '/CrawlerProcess \(http:\/\/www\.PowerMapper\.com\) \/([0-9.]*)/u' ], [ 'name' => 'Radian 6', 'id' => 'feed', 'regexp' => '/R6_FeedFetcher/u' ], [ 'name' => 'Radian 6', 'id' => 'comment', 'regexp' => '/R6_CommentReader/u' ], + [ 'name' => 'Moz', 'id' => 'comment', 'regexp' => '/rogerBot/u' ], [ 'name' => 'RssBar', 'id' => 'rssbar', 'regexp' => '/RssBar\/([0-9.]*)/u' ], [ 'name' => 'ROI Hunter', 'id' => 'hunter', 'regexp' => '/ROI Hunter/u' ], [ 'name' => 'QuerySeekerSpider', 'id' => 'spider', 'regexp' => '/QuerySeekerSpider(?:\/([0-9.]*))?/u' ], diff --git a/data/regexes/applications-bots.php b/data/regexes/applications-bots.php index 2c7e5ba56..a7ff953f2 100644 --- a/data/regexes/applications-bots.php +++ b/data/regexes/applications-bots.php @@ -2,4 +2,4 @@ namespace WhichBrowser\Data; -Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|ahrefsbot|archiver|altavista|curious|cloudfront|amorank|archive|jeeves|astra|backlink|baidu|bazqux|bingbot|msnbot|msmobot|bing|blogbridge|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|browsershots|bubing|butterfly|heritrix|cliqz|cloudflare|comodo|commafeed|cbot|watchdog|datasearch|daumoa|digg|domain|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|friendica|robot|genieo|fetchor|google|gomez|ichiro|gooblog|goorss|grammarly|grub|hatena|capture|heureka|htdig|httpmon|hubpages|spider|crawler|kouio|larbin|linkedin|linkdex|livedoor|ltx71|magpie|mj12bot|mapion|socialstreams|mixi|mnogo|monitor|yeti|netcraft|netvibes|newsblur|newsgator|orange|pages|psbot|pingdom|pinterest|postrank|comment|rssbar|hunter|quora|safesearch|scrapy|seznam|shopwiki|reader|simplepie|site24|sogou|sophora|soso|spdycheck|spinn|detector|summify|telegram|twisted|twitter|tiny|tlsprober|typhoeus|vagabondo|voila|vocus|mcrawler|w3bot|jigsaw|w3c|wayback|webindexer|wordpress|mechanize|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|httpclient|httpunit|nutch|synapse|indy|wget|curl|package|java|simple|libwww|lwp|urllib|php|pear|zend|ruby)/i'; +Applications::$BOTS_REGEX = '/(008|360|a6|abound|muncher|adaxas|addthis|admantx|ahrefsbot|archiver|altavista|curious|cloudfront|amorank|archive|jeeves|astra|backlink|baidu|bazqux|bingbot|msnbot|msmobot|bing|blogbridge|bloglines|bloglovin|blogpulse|blogram|blogtrot|blogshares|boardreader|browsershots|bubing|butterfly|heritrix|cliqz|cloudflare|comodo|commafeed|cbot|watchdog|datasearch|daumoa|digg|domain|exabot|exactseek|ezooms|facebook|fast|flamingo|fastladder|feed|wrangler|validator|fever|friendica|robot|genieo|fetchor|google|gomez|ichiro|gooblog|goorss|grammarly|grub|hatena|capture|heureka|htdig|httpmon|hubpages|spider|crawler|kouio|larbin|linkedin|linkdex|livedoor|ltx71|magpie|mj12bot|mapion|socialstreams|mixi|mnogo|monitor|yeti|netcraft|netvibes|newsblur|newsgator|orange|pages|psbot|pingdom|pinterest|postrank|comment|rssbar|hunter|quora|safesearch|scrapy|seznam|shopwiki|reader|simplepie|site24|sogou|sophora|soso|spdycheck|spinn|detector|rogerBot|summify|telegram|twisted|twitter|tiny|tlsprober|typhoeus|vagabondo|voila|vocus|mcrawler|w3bot|jigsaw|w3c|wayback|webindexer|wordpress|mechanize|xerka|sitemap|yacy|yandex|yahoo|y\!j|slurp|httpclient|httpunit|nutch|synapse|indy|wget|curl|package|java|simple|libwww|lwp|urllib|php|pear|zend|ruby)/i';