From 066cf971457e1e635084ee5ba20e55625762e5df Mon Sep 17 00:00:00 2001 From: Tanguy Moal Date: Thu, 14 Mar 2019 16:41:43 +0100 Subject: [PATCH 1/2] add failing test --- test/test-robots.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/test-robots.cpp b/test/test-robots.cpp index 4e87136..a0ee8bd 100644 --- a/test/test-robots.cpp +++ b/test/test-robots.cpp @@ -319,3 +319,21 @@ TEST(RobotsTest, NeverExternalAllowed) Rep::Robots robot("", "http://a.com/robots.txt"); EXPECT_FALSE(robot.allowed("http://b.com/", "one")); } + +TEST(RobotsTest, PrefixStarExample) +{ + std::string content = + "# /robots.txt for fun and profit\n" + "\n" + "User-agent: ohmagad\n" + "Allow: /\n" + "Disallow: */dir\n" + "Disallow: /*/dir\n"; + Rep::Robots robot(content); + + // The ohmagad bot + EXPECT_TRUE(robot.allowed("/", "ohmagad")); + EXPECT_TRUE(robot.allowed("/a/b/page.html", "ohmagad")); + EXPECT_FALSE(robot.allowed("/dir/page.html", "ohmagad")); + EXPECT_FALSE(robot.allowed("/some/dir/page.html", "ohmagad")); +} From 90901e72c2b9468dfa9caf69ebd376481ed0dd2c Mon Sep 17 00:00:00 2001 From: Tanguy Moal Date: Thu, 14 Mar 2019 16:42:29 +0100 Subject: [PATCH 2/2] don't use fullpath when adding directives --- src/agent.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/agent.cpp b/src/agent.cpp index 33824c5..16fa93d 100644 --- a/src/agent.cpp +++ b/src/agent.cpp @@ -25,7 +25,7 @@ namespace Rep { return *this; } - directives_.push_back(Directive(escape_url(url), true)); + directives_.push_back(Directive(url.defrag().escape().str(), true)); sorted_ = false; return *this; } @@ -45,7 +45,8 @@ namespace Rep { return *this; } - directives_.push_back(Directive(escape_url(url), false)); + + directives_.push_back(Directive(url.defrag().escape().str(), false)); } sorted_ = false; return *this;