From 9a2296fcf15e0095745097b3e8bb17dd8eb71a0c Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Sun, 10 Nov 2024 12:22:30 -0500 Subject: [PATCH 1/4] test wp 6.7. closes #62 --- block-ai-crawlers.php | 2 +- readme.txt | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index c4d0500..306016b 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -5,7 +5,7 @@ * Author: Bob Matyas * Author URI: https://www.bobmatyas.com * Text Domain: block-ai-crawlers - * Version: 1.4.0 + * Version: 1.4.1 * License: GPL-2.0-or-later * License URI: https://www.gnu.org/licenses/gpl-2.0.html * diff --git a/readme.txt b/readme.txt index 73289d7..b6fcfe7 100644 --- a/readme.txt +++ b/readme.txt @@ -2,9 +2,9 @@ Contributors: lastsplash Tags: ai, robots.txt, chatgpt, crawlers Requires at least: 5.6 -Tested up to: 6.6.2 +Tested up to: 6.7 Requires PHP: 7.4 -Stable tag: 1.4.0 +Stable tag: 1.4.1 License: GPLv2 or later License URI: https://www.gnu.org/licenses/gpl-2.0.html @@ -95,6 +95,10 @@ No. Search engines follow different `robots.txt` rules. == Changelog == += 1.4.1 = + +- Enhancement: WordPress 6.7 compatibility + = 1.4.0 = - New: Block Kangaroo Bot - New: Block sentibot From b1d8550c52df639deb101a1f30422ef5287c3acc Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Sun, 10 Nov 2024 12:25:43 -0500 Subject: [PATCH 2/4] block: ai2bot-dolma. closes #63 --- block-ai-crawlers.php | 1 + inc/settings-html.php | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index 306016b..da69107 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -29,6 +29,7 @@ function block_ai_robots_txt( $robots ) { $robots .= "\n# Block AI Crawlers\n\n"; $robots .= "User-agent: AI2Bot\n"; + $robots .= "User-agent: Ai2Bot-Dolma\n"; $robots .= "User-agent: AmazonBot\n"; $robots .= "User-agent: Applebot-Extended\n"; $robots .= "User-agent: anthropic-ai\n"; diff --git a/inc/settings-html.php b/inc/settings-html.php index 8908212..e47ca25 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -27,6 +27,11 @@
Explores sites for web content that is used to train open language models
Generates data sets used to train open language models
Used by Amazon's Alexa AI to provide AI answers.
Used by Timpi; likely for their Wilson AI Product.
Used by TurnitinBot to scrape data for plagiarism detection
Used by Webz.io for their social listening and intelligence platforms.
Used by TurnitinBot to scrape data for plagiarism detection
Used by Turnitin to scrape data for plagiarism detection