字符串匹配
问题描述:有文本 ,长度为 ;有模板(子串) ,长度为 ,检测文本 是否包含子串
这个函数在 c++ 中实现为 strstr
暴力法
时间复杂度
def strstrBruteForce(str, pattern):
if not pattern:
return 0
for i in range(len(str) - len(pattern) + 1):
stri = i
patterni = 0
while stri < len(str) and patterni < len(pattern) and str[stri] == pattern[patterni]:
stri += 1
patterni += 1
if patterni == len(pattern):
return i
return -1
print(strstrBruteForce("xxxxyzabcdabcdefabc", "abc"))
Robin-Karp 字符串匹配
- 先计算模板的哈希值
- 计算 中相同长度的子串的哈希值,与模板比较
- 如果模板与子串具有形同的哈希值,再来逐个比较字符
需要注意的是这里的哈希算法不能太复杂,否则反而会增加计算量
def RobinKarp(text, pattern):
if pattern == None or text == None:
return -1
if pattern == "" or text == "":
return -1
if len(pattern) > len(text):
return -1
hashText = Hash(text, len(pattern))
hashPattern = Hash(pattern, len(pattern))
hashPattern.update()
for i in range(len(text) - len(pattern) + 1):
if hashText.hashedValue() == hashPattern.hashedValue() and hashText.text() == pattern:
return i
hashText.update()
return -1
class Hash:
def __init__(self, text, size):
self.str = text
self.hash = 0
for i in range(0, size):
self.hash += ord(self.str[i])
self.init = 0
self.end = size
def update(self):
if self.end <= len(self.str) - 1:
self.hash -= ord(self.str[self.init])
self.hash += ord(self.str[self.end])
self.init += 1
self.end += 1
def hashedValue(self):
return self.hash
def text(self):
return self.str[self.init:self.end]
print(RobinKarp("3141592653589793", "26"))
KMP 算法
def prefixTable(pattern):
m = len(pattern)
F = [0] * m
k = 0
for q in range(1, m):
while k > 0 and pattern[k] != pattern[q]:
k = F[k - 1]
if pattern[k] == pattern[q]:
k = k + 1
F[q] = k
return F
def KMP(text, pattern):
n = len(text)
m = len(pattern)
F = prefixTable(pattern)
q = 0
for i in range(n):
while q > 0 and pattern[q] != text[i]:
q = F[q - 1]
if pattern[q] == text[i]:
q = q + 1
if q == m:
return i - m + 1
return -1
print(KMP("bacbabababacaca", "ababaca"))