Skip to content

Commit

Permalink
Merge pull request #38 from cogniteev/bug/prefix_wildcard_with_fullpath
Browse files Browse the repository at this point in the history
Merging fix for leading asterisk in robots.txt directive.
  • Loading branch information
The Chiefest and Greatest of Calamities authored May 28, 2019
2 parents 8afac6e + 90901e7 commit f406b76
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ namespace Rep
{
return *this;
}
directives_.push_back(Directive(escape_url(url), true));
directives_.push_back(Directive(url.defrag().escape().str(), true));
sorted_ = false;
return *this;
}
Expand All @@ -45,7 +45,8 @@ namespace Rep
{
return *this;
}
directives_.push_back(Directive(escape_url(url), false));

directives_.push_back(Directive(url.defrag().escape().str(), false));
}
sorted_ = false;
return *this;
Expand Down
18 changes: 18 additions & 0 deletions test/test-robots.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,21 @@ TEST(RobotsTest, NeverExternalAllowed)
Rep::Robots robot("", "http://a.com/robots.txt");
EXPECT_FALSE(robot.allowed("http://b.com/", "one"));
}

TEST(RobotsTest, PrefixStarExample)
{
std::string content =
"# /robots.txt for fun and profit\n"
"\n"
"User-agent: ohmagad\n"
"Allow: /\n"
"Disallow: */dir\n"
"Disallow: /*/dir\n";
Rep::Robots robot(content);

// The ohmagad bot
EXPECT_TRUE(robot.allowed("/", "ohmagad"));
EXPECT_TRUE(robot.allowed("/a/b/page.html", "ohmagad"));
EXPECT_FALSE(robot.allowed("/dir/page.html", "ohmagad"));
EXPECT_FALSE(robot.allowed("/some/dir/page.html", "ohmagad"));
}

0 comments on commit f406b76

Please sign in to comment.