chore(Python): add Rabin Karp algorithm (#699)
Co-authored-by: Arsenic <54987647+Arsenic-ATG@users.noreply.github.com>pull/712/merge
parent
a1eff4fc8b
commit
15d44eddae
|
@ -53,6 +53,7 @@
|
||||||
- [Longest Common Subsequence](strings/longest_common_subsequence.py)
|
- [Longest Common Subsequence](strings/longest_common_subsequence.py)
|
||||||
- [Unique Character](strings/unique_character.py)
|
- [Unique Character](strings/unique_character.py)
|
||||||
- [Add String](strings/add_string.py)
|
- [Add String](strings/add_string.py)
|
||||||
|
- [Rabin Karp algorithm](strings/rabin-karp-algorithm.py)
|
||||||
|
|
||||||
## Dynamic Programming
|
## Dynamic Programming
|
||||||
- [Print Fibonacci Series Up To N-th Term](dynamic_programming/fibonacci_series.py)
|
- [Print Fibonacci Series Up To N-th Term](dynamic_programming/fibonacci_series.py)
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
'''
|
||||||
|
String pattern matching algorithm which performs efficiently for large text and patterns
|
||||||
|
|
||||||
|
Algorithm:
|
||||||
|
Rabin Karp works on the concept of hashing. If the substring of the given text
|
||||||
|
is same as the pattern then the corresponding hash value should also be same. Exploiting this idea
|
||||||
|
and designing a hash function which can be computed in O(m) time for both pattern and initial window
|
||||||
|
of text. The subsequent window each will require only O(1) time. And we slide the window n-m times
|
||||||
|
after the initial window. Therefore the overall complexity of calculating hash function for text is O(n-m+1)
|
||||||
|
Once the hash value matches, the underlying string is again checked with pattern for matching
|
||||||
|
|
||||||
|
|
||||||
|
Complexity:
|
||||||
|
Best case: O(n-m+1)
|
||||||
|
Worst case: O(nm)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
def rabin_karp(T: str, P: str, q: int ,d: int = 256) -> None :
|
||||||
|
'''
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
T: string
|
||||||
|
The string where the pattern needs to be searched
|
||||||
|
|
||||||
|
P: string
|
||||||
|
The pattern to be searched
|
||||||
|
|
||||||
|
q: int
|
||||||
|
An appropriately chosen prime number based on length of input strings
|
||||||
|
The higher the prime number, the lower the collisions and spurious hits
|
||||||
|
|
||||||
|
d: int, default value 256
|
||||||
|
Denotes the no of unique character that is used for encoding
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
>>> pos = rabin_karp("AAEXCRTDDEAAFT","AA",101)
|
||||||
|
Pattern found at pos: 0
|
||||||
|
Pattern found at pos: 10
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
n = len(T) # length of text
|
||||||
|
m = len(P) # length of pattern
|
||||||
|
p=0 # Hash value of pattern
|
||||||
|
t=0 # Hash value of text
|
||||||
|
|
||||||
|
#Computing h: (h=d^m-1 mod q)
|
||||||
|
h=1
|
||||||
|
for i in range(1,m):
|
||||||
|
h = (h*d)%q
|
||||||
|
|
||||||
|
#Computing hash value of pattern and initial window (of size m) of text
|
||||||
|
for j in range(m):
|
||||||
|
p = (d*p + ord(P[j])) % q
|
||||||
|
t = (d*t + ord(T[j])) % q
|
||||||
|
|
||||||
|
|
||||||
|
found = False
|
||||||
|
pos=[] # To store positions
|
||||||
|
|
||||||
|
#Sliding window and matching
|
||||||
|
for s in range(n-m+1):
|
||||||
|
if p==t: # if hash value matches
|
||||||
|
if P == T[s:s+m]: # check for string match
|
||||||
|
pos.append(s)
|
||||||
|
if not found:
|
||||||
|
found = True
|
||||||
|
|
||||||
|
if s<n-m:
|
||||||
|
t = (d*(t-ord(T[s])*h) + ord(T[s+m])) % q # updating hash value of t for next window
|
||||||
|
if t<0:
|
||||||
|
t = t+q # To make sure t is positive integer
|
||||||
|
|
||||||
|
if not found: # If pattern not found in text
|
||||||
|
pos.append(-1)
|
||||||
|
|
||||||
|
#Printing results
|
||||||
|
if pos[0]==-1:
|
||||||
|
print("Pattern not found")
|
||||||
|
else:
|
||||||
|
for i in pos:
|
||||||
|
print(f"Pattern found at pos: {i}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
T = "AAEXCRTDDEAAFT"
|
||||||
|
P = "AA"
|
||||||
|
|
||||||
|
rabin_karp(T,P,101)
|
Loading…
Reference in New Issue