diff --git a/String Algorithms/Two-Way String-Matching Algorithm/Program.c b/String Algorithms/Two-Way String-Matching Algorithm/Program.c new file mode 100644 index 00000000..a0d80be6 --- /dev/null +++ b/String Algorithms/Two-Way String-Matching Algorithm/Program.c @@ -0,0 +1,112 @@ +#include +#include +#include + +#define MAX_PATTERN_LENGTH 1000 + +// Function to compute the critical factorization point +int computeCriticalPoint(const char* pattern, int length) { + int i = 1, j = 0; + int period = 1; + + while (i + j < length) { + if (pattern[i + j] == pattern[j]) { + j++; + } else if (pattern[i + j] > pattern[j]) { + j = 0; + i += period; + period = 1; + } else { + j = 0; + i++; + period = i; + } + } + + return i; +} + +// Function to compute the pattern period +int computePeriod(const char* pattern, int length) { + int period = 1; + while (period <= length / 2) { + int isValid = 1; + for (int i = period; i < length; i++) { + if (pattern[i] != pattern[i - period]) { + isValid = 0; + break; + } + } + if (isValid) { + return period; + } + period++; + } + return length; +} + +// Function to perform backward matching +int backwardMatch(const char* text, const char* pattern, int start, int length) { + int i = length - 1; + while (i >= 0 && text[start + i] == pattern[i]) { + i--; + } + return i; +} + +// Main Two-Way String-Matching algorithm +void twoWayMatching(const char* text, int textLength, const char* pattern, int patternLength) { + if (patternLength == 0) { + printf("Pattern is empty\n"); + return; + } + + // Compute critical factorization point + int criticalPoint = computeCriticalPoint(pattern, patternLength); + + // Compute pattern period + int period = computePeriod(pattern, patternLength); + + // Main search loop + int pos = 0; + while (pos <= textLength - patternLength) { + // Forward scan until critical point + int i = 0; + while (i < criticalPoint && pattern[i] == text[pos + i]) { + i++; + } + + if (i == criticalPoint) { + // Backward scan from the end + int j = backwardMatch(text, pattern, pos, patternLength); + + if (j < 0) { + // Pattern found + printf("Pattern found at index %d\n", pos); + pos += period; + } else { + // Mismatch in backward scan + pos += j + 1; + } + } else { + // Mismatch in forward scan + pos += i + 1; + } + } +} + +int main() { + const char* text = "GCATCGCAGAGAGTATACAGTACG"; + const char* pattern = "GCAGAGAG"; + + int textLength = strlen(text); + int patternLength = strlen(pattern); + + printf("Text: %s\n", text); + printf("Pattern: %s\n", pattern); + printf("Matches:\n"); + + twoWayMatching(text, textLength, pattern, patternLength); + + return 0; +} \ No newline at end of file diff --git a/String Algorithms/Two-Way String-Matching Algorithm/README.md b/String Algorithms/Two-Way String-Matching Algorithm/README.md new file mode 100644 index 00000000..d187f4c7 --- /dev/null +++ b/String Algorithms/Two-Way String-Matching Algorithm/README.md @@ -0,0 +1,80 @@ +# Two-Way String-Matching Algorithm + +## Description + +The Two-Way String-Matching algorithm, developed by Crochemore and Perrin in 1991, is an efficient pattern matching algorithm that improves upon the Boyer-Moore algorithm by scanning the pattern in both directions. It combines forward and backward pattern scanning to achieve optimal searching in linear time. + +### Problem Definition + +Given: +- A pattern string P of length m +- A text string T of length n + +Objective: +- Find all occurrences of P in T + +### Algorithm Overview + +1. **Pattern Analysis**: + - Factorize the pattern into two parts + - Compute the critical factorization point + - Analyze pattern periodicity + +2. **Searching Phase**: + - Perform forward scan until a mismatch or potential match + - For potential matches, perform backward scan + - Use period information to make maximal shifts + +### Key Features + +- Combines advantages of Boyer-Moore and Knuth-Morris-Pratt algorithms +- Uses bidirectional pattern scanning +- Achieves optimal worst-case complexity +- Performs well on both short and long patterns +- Particularly efficient for patterns with low periodicity + +### Time Complexity + +- Preprocessing: O(m), where m is the length of the pattern +- Searching: O(n), where n is the length of the text +- Total: O(n + m) +- In practice, achieves sublinear time on many inputs + +### Space Complexity + +O(m), where m is the length of the pattern + +## Implementation + +The implementation in C demonstrates the Two-Way String-Matching algorithm for exact string matching. It includes: + +1. Functions to analyze the pattern and find its critical factorization +2. The main two-way search algorithm +3. Helper functions for period computation +4. A demonstration of the algorithm's usage + +## Usage + +Compile the program and run it. The example in the main function demonstrates how to use the Two-Way String-Matching algorithm to find all occurrences of a pattern in a given text. + +## Advantages + +- Better average-case performance compared to classic algorithms +- Optimal worst-case complexity +- No preprocessing of the text required +- Efficient handling of patterns with non-trivial periods +- Suitable for both short and long patterns + +## Limitations + +- More complex implementation compared to simpler algorithms +- Preprocessing phase requires careful handling of pattern periodicity +- Memory usage proportional to pattern length + +## Applications + +- Text editors and word processors +- Bioinformatics sequence matching +- Network packet inspection +- Data compression algorithms +- Digital forensics \ No newline at end of file