From 455bb11a96885411343dc56e495918fd424f1cce Mon Sep 17 00:00:00 2001 From: Amisha Mohapatra Date: Tue, 2 Feb 2021 21:32:30 +0530 Subject: [PATCH] Add String Search Algorithms in Java & C++ (#55) * Add string searching algorithms * Update kmp.cpp * Update README.md --- strings/README.md | 24 ++++++++- strings/c-or-cpp/kmp.cpp | 90 +++++++++++++++++++++++++++++++++ strings/c-or-cpp/rabin-karp.cpp | 79 +++++++++++++++++++++++++++++ strings/java/kmp.java | 82 ++++++++++++++++++++++++++++++ strings/java/rabin-karp.java | 78 ++++++++++++++++++++++++++++ 5 files changed, 352 insertions(+), 1 deletion(-) create mode 100644 strings/c-or-cpp/kmp.cpp create mode 100644 strings/c-or-cpp/rabin-karp.cpp create mode 100644 strings/java/kmp.java create mode 100644 strings/java/rabin-karp.java diff --git a/strings/README.md b/strings/README.md index 2724a0ad..e501ffe0 100644 --- a/strings/README.md +++ b/strings/README.md @@ -1,9 +1,16 @@ -# String operations +# String Algorithms ### C or C++ 1. [Palindrome Check](c-or-cpp/palindrome.c) 2. [All subsequences](c-or-cpp/sequence.cpp) +3. [KMP String Searching](c-or-cpp/kmp.cpp) +4. [Rabin Karp String Searching](c-or-cpp/rabin-karp.cpp) + + + + + ### C# You could use any online IDE (for an example [.net Finddle](https://dotnetfiddle.net/)) to test them. @@ -18,3 +25,18 @@ You could use any online IDE (for an example [.net Finddle](https://dotnetfiddle 1. [Palindrome Check](java/palindrome.java) 2. [All subsequences](java/sequence.java) +3. [KMP String Searching](java/kmp.cpp) +4. [Rabin Karp String Searching](java/rabin-karp.cpp) + + + + + + + + + + + + + diff --git a/strings/c-or-cpp/kmp.cpp b/strings/c-or-cpp/kmp.cpp new file mode 100644 index 00000000..ae069798 --- /dev/null +++ b/strings/c-or-cpp/kmp.cpp @@ -0,0 +1,90 @@ +#include + +void computeLPSArray(char* pat, int M, int* lps); + +// Prints occurrences of pat[] int txt[] +void KMPSearch(char* pat, char* txt) +{ + int M = strlen(pat); + int N = strlen(txt); + + // create lps[] that will hold the longest prefix suffix + // values for pattern + int lps[M]; + + // Preprocess the pattern (calculate lps[] array) + computeLPSArray(pat, M, lps); + + int i = 0; // index for txt[] + int j = 0; // index for pat[] + bool f = 1;//flag to indicate pattern not found + while (i < N) { + if (pat[j] == txt[i]) { + j++; + i++; + } + + if (j == M) { + printf("Found pattern at index %d\n", i - j); + f=0; + j = lps[j - 1]; + } + + // mismatch after j matches + else if (i < N && pat[j] != txt[i]) { + // Do not match lps[0..lps[j-1]] characters, + // they will match anyway + if (j != 0) + j = lps[j - 1]; + else + i = i + 1; + } + } + if(f) + printf("Pattern is not found"); +} + +// Fills lps[] for given patttern pat[0..M-1] +void computeLPSArray(char* pat, int M, int* lps) +{ + // length of the previous longest prefix suffix + int len = 0; + + lps[0] = 0; // lps[0] is always 0 + + // the loop calculates lps[i] for i = 1 to M-1 + int i = 1; + while (i < M) { + if (pat[i] == pat[len]) { + len++; + lps[i] = len; + i++; + } + else // (pat[i] != pat[len]) + { + // This is tricky. Consider the example. + // AAACAAAA and i = 7. The idea is similar + // to search step. + if (len != 0) { + len = lps[len - 1]; + + // Also, note that we do not increment + // i here + } + else // if (len == 0) + { + lps[i] = 0; + i++; + } + } + } +} + +// Driver program to test above function +int main() +{ + char txt[] = "ABABDABACDABABCABAB"; + char pat[] = "ABABCABAB"; + KMPSearch(pat, txt); + return 0; +} diff --git a/strings/c-or-cpp/rabin-karp.cpp b/strings/c-or-cpp/rabin-karp.cpp new file mode 100644 index 00000000..06410440 --- /dev/null +++ b/strings/c-or-cpp/rabin-karp.cpp @@ -0,0 +1,79 @@ +#include +using namespace std; + +// d is the number of characters in the input alphabet +#define d 256 + +/* pat -> pattern + txt -> text + q -> A prime number +*/ +void search(char pat[], char txt[], int q) +{ + int M = strlen(pat); + int N = strlen(txt); + int i, j; + int p = 0; // hash value for pattern + int t = 0; // hash value for txt + int h = 1; + + // The value of h would be "pow(d, M-1)%q" + for (i = 0; i < M - 1; i++) + h = (h * d) % q; + + // Calculate the hash value of pattern and first + // window of text + for (i = 0; i < M; i++) + { + p = (d * p + pat[i]) % q; + t = (d * t + txt[i]) % q; + } + + // Slide the pattern over text one by one + for (i = 0; i <= N - M; i++) + { + + // Check the hash values of current window of text + // and pattern. If the hash values match then only + // check for characters on by one + if ( p == t ) + { + /* Check for characters one by one */ + for (j = 0; j < M; j++) + { + if (txt[i+j] != pat[j]) + break; + } + + // if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-1] + if (j == M) + cout<<"Pattern found at index "<< i< pattern + txt -> text + q -> A prime number + */ + static void search(String pat, String txt, int q) + { + int M = pat.length(); + int N = txt.length(); + int i, j; + int p = 0; // hash value for pattern + int t = 0; // hash value for txt + int h = 1; + + // The value of h would be "pow(d, M-1)%q" + for (i = 0; i < M-1; i++) + h = (h*d)%q; + + // Calculate the hash value of pattern and first + // window of text + for (i = 0; i < M; i++) + { + p = (d*p + pat.charAt(i))%q; + t = (d*t + txt.charAt(i))%q; + } + + // Slide the pattern over text one by one + for (i = 0; i <= N - M; i++) + { + + // Check the hash values of current window of text + // and pattern. If the hash values match then only + // check for characters on by one + if ( p == t ) + { + /* Check for characters one by one */ + for (j = 0; j < M; j++) + { + if (txt.charAt(i+j) != pat.charAt(j)) + break; + } + + // if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-1] + if (j == M) + System.out.println("Pattern found at index " + i); + } + + // Calculate hash value for next window of text: Remove + // leading digit, add trailing digit + if ( i < N-M ) + { + t = (d*(t - txt.charAt(i)*h) + txt.charAt(i+M))%q; + + // We might get negative value of t, converting it + // to positive + if (t < 0) + t = (t + q); + } + } + } + + /* Driver Code */ + public static void main(String[] args) + { + String txt = "ABCAMCABAMMAM"; + String pat = "AM"; + + // A prime number + int q = 101; + + // Function Call + search(pat, txt, q); + } +} \ No newline at end of file