From 9a2296fcf15e0095745097b3e8bb17dd8eb71a0c Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Sun, 10 Nov 2024 12:22:30 -0500 Subject: [PATCH 1/4] test wp 6.7. closes #62 --- block-ai-crawlers.php | 2 +- readme.txt | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index c4d0500..306016b 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -5,7 +5,7 @@ * Author: Bob Matyas * Author URI: https://www.bobmatyas.com * Text Domain: block-ai-crawlers - * Version: 1.4.0 + * Version: 1.4.1 * License: GPL-2.0-or-later * License URI: https://www.gnu.org/licenses/gpl-2.0.html * diff --git a/readme.txt b/readme.txt index 73289d7..b6fcfe7 100644 --- a/readme.txt +++ b/readme.txt @@ -2,9 +2,9 @@ Contributors: lastsplash Tags: ai, robots.txt, chatgpt, crawlers Requires at least: 5.6 -Tested up to: 6.6.2 +Tested up to: 6.7 Requires PHP: 7.4 -Stable tag: 1.4.0 +Stable tag: 1.4.1 License: GPLv2 or later License URI: https://www.gnu.org/licenses/gpl-2.0.html @@ -95,6 +95,10 @@ No. Search engines follow different `robots.txt` rules. == Changelog == += 1.4.1 = + +- Enhancement: WordPress 6.7 compatibility + = 1.4.0 = - New: Block Kangaroo Bot - New: Block sentibot From b1d8550c52df639deb101a1f30422ef5287c3acc Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Sun, 10 Nov 2024 12:25:43 -0500 Subject: [PATCH 2/4] block: ai2bot-dolma. closes #63 --- block-ai-crawlers.php | 1 + inc/settings-html.php | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index 306016b..da69107 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -29,6 +29,7 @@ function block_ai_robots_txt( $robots ) { $robots .= "\n# Block AI Crawlers\n\n"; $robots .= "User-agent: AI2Bot\n"; + $robots .= "User-agent: Ai2Bot-Dolma\n"; $robots .= "User-agent: AmazonBot\n"; $robots .= "User-agent: Applebot-Extended\n"; $robots .= "User-agent: anthropic-ai\n"; diff --git a/inc/settings-html.php b/inc/settings-html.php index 8908212..e47ca25 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -27,6 +27,11 @@

Explores sites for web content that is used to train open language models

More Info + + Ai2Bot-Dolma +

Generates data sets used to train open language models

+ More Info + AmazonBot

Used by Amazon's Alexa AI to provide AI answers.

From 801ae4649ce9f741d1329bd1c05ffcb181f6b1bb Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Sun, 10 Nov 2024 12:29:18 -0500 Subject: [PATCH 3/4] block: turnitinbo. closes #64 --- block-ai-crawlers.php | 1 + inc/settings-html.php | 5 +++++ readme.txt | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/block-ai-crawlers.php b/block-ai-crawlers.php index da69107..66c31d4 100644 --- a/block-ai-crawlers.php +++ b/block-ai-crawlers.php @@ -57,6 +57,7 @@ function block_ai_robots_txt( $robots ) { $robots .= "User-agent: SentiBot\n"; $robots .= "User-agent: sentibot\n"; $robots .= "User-agent: Timpibot\n"; + $robots .= "User-agent: TurnitinBot\n"; $robots .= "User-agent: YouBot\n"; $robots .= "User-agent: webzio\n"; $robots .= "User-agent: webzio-extended\n"; diff --git a/inc/settings-html.php b/inc/settings-html.php index e47ca25..418eaf6 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -132,6 +132,11 @@

Used by Timpi; likely for their Wilson AI Product.

More Info + + TurnitinBot +

Used by TurnitinBot to scrape data for plagiarism detection

+ More Info + Webzio

Used by Webz.io for their social listening and intelligence platforms.

diff --git a/readme.txt b/readme.txt index b6fcfe7..14453aa 100644 --- a/readme.txt +++ b/readme.txt @@ -96,7 +96,8 @@ No. Search engines follow different `robots.txt` rules. == Changelog == = 1.4.1 = - += New: Block Turnitinbot +- New: Block Ai2Bot-Dolma - Enhancement: WordPress 6.7 compatibility = 1.4.0 = From 62d493c1b6c9da909d261be0a058147ddfd6fba0 Mon Sep 17 00:00:00 2001 From: bob <45246438+bobmatyas@users.noreply.github.com> Date: Sun, 10 Nov 2024 12:29:26 -0500 Subject: [PATCH 4/4] fix typo --- inc/settings-html.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inc/settings-html.php b/inc/settings-html.php index 418eaf6..530139f 100644 --- a/inc/settings-html.php +++ b/inc/settings-html.php @@ -134,7 +134,7 @@ TurnitinBot -

Used by TurnitinBot to scrape data for plagiarism detection

+

Used by Turnitin to scrape data for plagiarism detection

More Info