Skip to content

Commit

Permalink
Merge pull request #48 from Shu-AFK/main
Browse files Browse the repository at this point in the history
Added Rabin-Karp string search algorithm and tests
  • Loading branch information
spirosmaggioros authored Apr 4, 2024
2 parents bcedee7 + ee21e34 commit e21662b
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 0 deletions.
102 changes: 102 additions & 0 deletions src/algorithms/string/rabin_karp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#ifndef RABIN_KARP_H
#define RABIN_KARP_H

#ifdef __cplusplus
#include <string>
#include <vector>
#endif

namespace {
const int base = 26;
const int modulus = 1e9 + 7;

/**
* @brief Computes the hash value of a substring within a given string.
* @param str The input string.
* @param start The starting index of the substring.
* @param end The ending index of the substring.
* @return The computed hash value of the substring.
*/
size_t compute_hash(const std::string &str, size_t start, size_t end) {
size_t curr_mod = 1;
size_t hash_value = 0;
for(size_t i = start; i < end; i++) {
hash_value = (hash_value + (size_t(str[end - i - 1]) * curr_mod) % modulus) % modulus;
curr_mod = (curr_mod * base) % modulus;
}
return hash_value;
}

/**
* @brief Check if two substrings have a collision
*
* This function compares two substrings from two given strings to check if they have a collision.
* A collision occurs if the characters in the corresponding positions of the substrings are not equal.
*
* @param str1 The first string
* @param start1 The starting position of the first substring in str1
* @param str2 The second string
* @param start2 The starting position of the second substring in str2
* @param length The length of the substrings to compare
* @return true if the substrings have a collision, false otherwise
* @note This function assumes str1 and str2 are valid strings with lengths greater than or equal to start1 + length and start2 + length respectively.
*/
bool check_collision(const std::string &str1, size_t start1, const std::string &str2, size_t start2, size_t length) {
for(size_t i = 0; i < length; ++i) {
if(str1[start1 + i] != str2[start2 + i]) {
return false;
}
}
return true;
}
}

/**
* @brief Executes the Rabin-Karp algorithm to search for occurrences of a pattern within a text.
*
* @details
* This algorithm uses rolling hash values to efficiently compare substrings of the text with the pattern.
*
* @param text The input text to search within.
* @param pattern The pattern to search for within the text.
* @return A vector of starting indices of all occurrences of the pattern in the text. If none were found the vector is empty.
*/
std::vector<size_t> rabin_karp(const std::string &text, const std::string &pattern) {
std::vector<size_t> result;
size_t pattern_length = pattern.length();
size_t text_length = text.length();

if(pattern_length == 0) { // if pattern is empty, it can be found at every index including the end of the text
for(size_t i = 0; i <= text_length; i++) {
result.push_back(i);
}
return result;
}

if (text_length < pattern_length) { // if text is shorter than pattern, pattern can not be found
return result;
}

// calculate the hash of the pattern and the hash of the first pattern_length characters of the text
size_t pattern_hash = compute_hash(pattern, 0, pattern_length);
size_t text_hash = compute_hash(text, 0, pattern_length);

// the highest power used in the hash calculation of the pattern
size_t power = 1;
for(int i = 0; i < pattern_length - 1; ++i)
power = (power*base) % modulus;

for(size_t i = 0; i <= text_length - pattern_length; ++i) {
if(pattern_hash == text_hash && check_collision(text, i, pattern, 0, pattern_length)) {
result.push_back(i);
}

if (i < text_length - pattern_length) {
text_hash = (base*(text_hash - ((size_t)text[i]*power % modulus) + modulus) % modulus + (size_t)text[i + pattern_length]) % modulus;
}
}

return result;
}

#endif
99 changes: 99 additions & 0 deletions tests/algorithms/string/rabin_karp.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include "../../../src/algorithms/string/rabin_karp.h"
#include "../../../third_party/catch.hpp"
#include <string>
#include <vector>

TEST_CASE("Testing Rabin Karp with no valid matches") {
std::string text = "Hello world";
std::string pattern = "abc";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.empty());
}

TEST_CASE("Testing Rabin Karp with valid input") {
std::string text = "Hello world";
std::string pattern = "Hello";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.size() == 1);
REQUIRE(result[0] == 0);
}

TEST_CASE("Testing Rabin Karp with incorrect input") {
std::string text = "abc";
std::string pattern = "abcdefg";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.empty());
}

TEST_CASE("Testing Rabin Karp with multiple occurrences") {
std::string text = "abcabcabc";
std::string pattern = "abc";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.size() == 3);
REQUIRE(result[0] == 0);
REQUIRE(result[1] == 3);
REQUIRE(result[2] == 6);
}

TEST_CASE("Testing Rabin Karp with empty text and non-empty pattern") {
std::string text = "";
std::string pattern = "abcdefg";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.empty());
}

TEST_CASE("Testing Rabin Karp with non-empty text and empty pattern") {
std::string text = "abcdefg";
std::string pattern = "";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.size() == text.length() + 1);
}

TEST_CASE("Testing Rabin Karp with both empty text and pattern") {
std::string text = "";
std::string pattern = "";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.size() == 1);
REQUIRE(result[0] == 0);
}

TEST_CASE("Testing Rabin Karp with pattern equals to the text") {
std::string text = "Hello world";
std::string pattern = "Hello world";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.size() == 1);
REQUIRE(result[0] == 0);
}

TEST_CASE("Stress testing Rabin Karp") {
std::string text(1e6, 'a');
std::string pattern(1e3, 'a');

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.size() == 1e6 - 1e3 + 1);
for (size_t i = 0; i < result.size(); i++) {
REQUIRE(result[i] == i);
}
}

TEST_CASE("Testing Rabin Karp with multiple occurrences of duplicate characters") {
std::string text = "aabcaaabcaaaab";
std::string pattern = "aa";

std::vector<size_t> result = rabin_karp(text, pattern);
REQUIRE(result.size() == 6);
REQUIRE(result[0] == 0);
REQUIRE(result[1] == 4);
REQUIRE(result[2] == 5);
REQUIRE(result[3] == 9);
REQUIRE(result[4] == 10);
REQUIRE(result[5] == 11);
}

0 comments on commit e21662b

Please sign in to comment.