Add String Search Algorithms in Java & C++ (#55)
* Add string searching algorithms * Update kmp.cpp * Update README.mdpull/57/head^2
parent
445a57d57b
commit
455bb11a96
|
@ -1,9 +1,16 @@
|
|||
# String operations
|
||||
# String Algorithms
|
||||
|
||||
### C or C++
|
||||
|
||||
1. [Palindrome Check](c-or-cpp/palindrome.c)
|
||||
2. [All subsequences](c-or-cpp/sequence.cpp)
|
||||
3. [KMP String Searching](c-or-cpp/kmp.cpp)
|
||||
4. [Rabin Karp String Searching](c-or-cpp/rabin-karp.cpp)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### C#
|
||||
You could use any online IDE (for an example [.net Finddle](https://dotnetfiddle.net/)) to test them.
|
||||
|
@ -18,3 +25,18 @@ You could use any online IDE (for an example [.net Finddle](https://dotnetfiddle
|
|||
|
||||
1. [Palindrome Check](java/palindrome.java)
|
||||
2. [All subsequences](java/sequence.java)
|
||||
3. [KMP String Searching](java/kmp.cpp)
|
||||
4. [Rabin Karp String Searching](java/rabin-karp.cpp)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
#include <bits/stdc++.h>
|
||||
|
||||
void computeLPSArray(char* pat, int M, int* lps);
|
||||
|
||||
// Prints occurrences of pat[] int txt[]
|
||||
void KMPSearch(char* pat, char* txt)
|
||||
{
|
||||
int M = strlen(pat);
|
||||
int N = strlen(txt);
|
||||
|
||||
// create lps[] that will hold the longest prefix suffix
|
||||
// values for pattern
|
||||
int lps[M];
|
||||
|
||||
// Preprocess the pattern (calculate lps[] array)
|
||||
computeLPSArray(pat, M, lps);
|
||||
|
||||
int i = 0; // index for txt[]
|
||||
int j = 0; // index for pat[]
|
||||
bool f = 1;//flag to indicate pattern not found
|
||||
while (i < N) {
|
||||
if (pat[j] == txt[i]) {
|
||||
j++;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (j == M) {
|
||||
printf("Found pattern at index %d\n", i - j);
|
||||
f=0;
|
||||
j = lps[j - 1];
|
||||
}
|
||||
|
||||
// mismatch after j matches
|
||||
else if (i < N && pat[j] != txt[i]) {
|
||||
// Do not match lps[0..lps[j-1]] characters,
|
||||
// they will match anyway
|
||||
if (j != 0)
|
||||
j = lps[j - 1];
|
||||
else
|
||||
i = i + 1;
|
||||
}
|
||||
}
|
||||
if(f)
|
||||
printf("Pattern is not found");
|
||||
}
|
||||
|
||||
// Fills lps[] for given patttern pat[0..M-1]
|
||||
void computeLPSArray(char* pat, int M, int* lps)
|
||||
{
|
||||
// length of the previous longest prefix suffix
|
||||
int len = 0;
|
||||
|
||||
lps[0] = 0; // lps[0] is always 0
|
||||
|
||||
// the loop calculates lps[i] for i = 1 to M-1
|
||||
int i = 1;
|
||||
while (i < M) {
|
||||
if (pat[i] == pat[len]) {
|
||||
len++;
|
||||
lps[i] = len;
|
||||
i++;
|
||||
}
|
||||
else // (pat[i] != pat[len])
|
||||
{
|
||||
// This is tricky. Consider the example.
|
||||
// AAACAAAA and i = 7. The idea is similar
|
||||
// to search step.
|
||||
if (len != 0) {
|
||||
len = lps[len - 1];
|
||||
|
||||
// Also, note that we do not increment
|
||||
// i here
|
||||
}
|
||||
else // if (len == 0)
|
||||
{
|
||||
lps[i] = 0;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Driver program to test above function
|
||||
int main()
|
||||
{
|
||||
char txt[] = "ABABDABACDABABCABAB";
|
||||
char pat[] = "ABABCABAB";
|
||||
KMPSearch(pat, txt);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
#include <bits/stdc++.h>
|
||||
using namespace std;
|
||||
|
||||
// d is the number of characters in the input alphabet
|
||||
#define d 256
|
||||
|
||||
/* pat -> pattern
|
||||
txt -> text
|
||||
q -> A prime number
|
||||
*/
|
||||
void search(char pat[], char txt[], int q)
|
||||
{
|
||||
int M = strlen(pat);
|
||||
int N = strlen(txt);
|
||||
int i, j;
|
||||
int p = 0; // hash value for pattern
|
||||
int t = 0; // hash value for txt
|
||||
int h = 1;
|
||||
|
||||
// The value of h would be "pow(d, M-1)%q"
|
||||
for (i = 0; i < M - 1; i++)
|
||||
h = (h * d) % q;
|
||||
|
||||
// Calculate the hash value of pattern and first
|
||||
// window of text
|
||||
for (i = 0; i < M; i++)
|
||||
{
|
||||
p = (d * p + pat[i]) % q;
|
||||
t = (d * t + txt[i]) % q;
|
||||
}
|
||||
|
||||
// Slide the pattern over text one by one
|
||||
for (i = 0; i <= N - M; i++)
|
||||
{
|
||||
|
||||
// Check the hash values of current window of text
|
||||
// and pattern. If the hash values match then only
|
||||
// check for characters on by one
|
||||
if ( p == t )
|
||||
{
|
||||
/* Check for characters one by one */
|
||||
for (j = 0; j < M; j++)
|
||||
{
|
||||
if (txt[i+j] != pat[j])
|
||||
break;
|
||||
}
|
||||
|
||||
// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-1]
|
||||
if (j == M)
|
||||
cout<<"Pattern found at index "<< i<<endl;
|
||||
}
|
||||
|
||||
// Calculate hash value for next window of text: Remove
|
||||
// leading digit, add trailing digit
|
||||
if ( i < N-M )
|
||||
{
|
||||
t = (d*(t - txt[i]*h) + txt[i+M])%q;
|
||||
|
||||
// We might get negative value of t, converting it
|
||||
// to positive
|
||||
if (t < 0)
|
||||
t = (t + q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Driver code */
|
||||
int main()
|
||||
{
|
||||
char txt[] = "ABCAMCABAMMAM";
|
||||
char pat[] = "AM";
|
||||
|
||||
// A prime number
|
||||
int q = 101;
|
||||
|
||||
// Function Call
|
||||
search(pat, txt, q);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,82 @@
|
|||
class KMP_String_Matching {
|
||||
void KMPSearch(String pat, String txt)
|
||||
{
|
||||
int M = pat.length();
|
||||
int N = txt.length();
|
||||
|
||||
// create lps[] that will hold the longest
|
||||
// prefix suffix values for pattern
|
||||
int lps[] = new int[M];
|
||||
int j = 0; // index for pat[]
|
||||
|
||||
// Preprocess the pattern (calculate lps[]
|
||||
// array)
|
||||
computeLPSArray(pat, M, lps);
|
||||
|
||||
int i = 0; // index for txt[]
|
||||
while (i < N) {
|
||||
if (pat.charAt(j) == txt.charAt(i)) {
|
||||
j++;
|
||||
i++;
|
||||
}
|
||||
if (j == M) {
|
||||
System.out.println("Found pattern "
|
||||
+ "at index " + (i - j));
|
||||
j = lps[j - 1];
|
||||
}
|
||||
|
||||
// mismatch after j matches
|
||||
else if (i < N && pat.charAt(j) != txt.charAt(i)) {
|
||||
// Do not match lps[0..lps[j-1]] characters,
|
||||
// they will match anyway
|
||||
if (j != 0)
|
||||
j = lps[j - 1];
|
||||
else
|
||||
i = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void computeLPSArray(String pat, int M, int lps[])
|
||||
{
|
||||
// length of the previous longest prefix suffix
|
||||
int len = 0;
|
||||
int i = 1;
|
||||
lps[0] = 0; // lps[0] is always 0
|
||||
|
||||
// the loop calculates lps[i] for i = 1 to M-1
|
||||
while (i < M) {
|
||||
if (pat.charAt(i) == pat.charAt(len)) {
|
||||
len++;
|
||||
lps[i] = len;
|
||||
i++;
|
||||
}
|
||||
else // (pat[i] != pat[len])
|
||||
{
|
||||
// This is tricky. Consider the example.
|
||||
// AAACAAAA and i = 7. The idea is similar
|
||||
// to search step.
|
||||
if (len != 0) {
|
||||
len = lps[len - 1];
|
||||
|
||||
// Also, note that we do not increment
|
||||
// i here
|
||||
}
|
||||
else // if (len == 0)
|
||||
{
|
||||
lps[i] = len;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Driver program to test above function
|
||||
public static void main(String args[])
|
||||
{
|
||||
String txt = "ABABDABACDABABCABAB";
|
||||
String pat = "ABABCABAB";
|
||||
new KMP_String_Matching().KMPSearch(pat, txt);
|
||||
}
|
||||
}
|
||||
// This code has been contributed by Amit Khandelwal.
|
|
@ -0,0 +1,78 @@
|
|||
public class Main
|
||||
{
|
||||
// d is the number of characters in the input alphabet
|
||||
public final static int d = 256;
|
||||
|
||||
/* pat -> pattern
|
||||
txt -> text
|
||||
q -> A prime number
|
||||
*/
|
||||
static void search(String pat, String txt, int q)
|
||||
{
|
||||
int M = pat.length();
|
||||
int N = txt.length();
|
||||
int i, j;
|
||||
int p = 0; // hash value for pattern
|
||||
int t = 0; // hash value for txt
|
||||
int h = 1;
|
||||
|
||||
// The value of h would be "pow(d, M-1)%q"
|
||||
for (i = 0; i < M-1; i++)
|
||||
h = (h*d)%q;
|
||||
|
||||
// Calculate the hash value of pattern and first
|
||||
// window of text
|
||||
for (i = 0; i < M; i++)
|
||||
{
|
||||
p = (d*p + pat.charAt(i))%q;
|
||||
t = (d*t + txt.charAt(i))%q;
|
||||
}
|
||||
|
||||
// Slide the pattern over text one by one
|
||||
for (i = 0; i <= N - M; i++)
|
||||
{
|
||||
|
||||
// Check the hash values of current window of text
|
||||
// and pattern. If the hash values match then only
|
||||
// check for characters on by one
|
||||
if ( p == t )
|
||||
{
|
||||
/* Check for characters one by one */
|
||||
for (j = 0; j < M; j++)
|
||||
{
|
||||
if (txt.charAt(i+j) != pat.charAt(j))
|
||||
break;
|
||||
}
|
||||
|
||||
// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-1]
|
||||
if (j == M)
|
||||
System.out.println("Pattern found at index " + i);
|
||||
}
|
||||
|
||||
// Calculate hash value for next window of text: Remove
|
||||
// leading digit, add trailing digit
|
||||
if ( i < N-M )
|
||||
{
|
||||
t = (d*(t - txt.charAt(i)*h) + txt.charAt(i+M))%q;
|
||||
|
||||
// We might get negative value of t, converting it
|
||||
// to positive
|
||||
if (t < 0)
|
||||
t = (t + q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Driver Code */
|
||||
public static void main(String[] args)
|
||||
{
|
||||
String txt = "ABCAMCABAMMAM";
|
||||
String pat = "AM";
|
||||
|
||||
// A prime number
|
||||
int q = 101;
|
||||
|
||||
// Function Call
|
||||
search(pat, txt, q);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue