目录
KMP算法
next数组
例子:
a | b | a | b | c | a | b | a |
---|---|---|---|---|---|---|---|
0 | 0 | 1 | 2 | 0 | 1 | 2 | 3 |
-1 | 0 | 0 | 1 | 2 | 0 | 1 | 2 |
-1 | 0 | -1 | 0 | 2 | -1 | 0 | -1 |
第一行:
string s = "ababcaba";
int *k = new int[s.size()];
k[0] = 0;
int p = 0;
int i = 0;
while (i<s.size()){
if(s[i+1]==s[p]){
i++;
p++;
k[i]=p;
}
else if(p==0){
++i;
k[i] = 0;
}
else{
p=k[p-1];
}
}
第二行:
string s = "ababcaba";
int *k=new int[s.size()];
k[0] = -1;
int i = 0;
int p = -1;
while (i<s.size())
{
if (p == -1 ||s[p]==s[i])
{
++i;
++p;
k[i] = p;
}
else{
p = k[p];
}
}
第三行:
string s = "ababcaba";
int *k=new int[s.size()];
k[0] = -1;
int i = 0;
int p = -1;
while (i<s.size())
{
if (p == -1 ||s[p]==s[i])
{
++i;
++p;
if(s[i]!=s[p])
{
k[i]=p;
}
else
{
k[i]=k[p];
}
}
else{
p = k[p];
}
}
kmp算法
int KmpSearch(char* s, char* p)
{
int i = 0;
int j = 0;
int sLen = strlen(s);
int pLen = strlen(p);
while (i < sLen && j < pLen)
{
//①如果j = -1,或者当前字符匹配成功(即S[i] == P[j]),都令i++,j++
if (j == -1 || s[i] == p[j])
{
i++;
j++;
}
else
{
//②如果j != -1,且当前字符匹配失败(即S[i] != P[j]),则令 i 不变,j = next[j]
//next[j]即为j所对应的next值
j = next[j];
}
}
if (j == pLen)
return i - j;
else
return -1;
}
BM算法
两个优化点:
坏字符:
bc表:每个字符在模式串上最后一次出现的位置
int bc[256];
for (int i = 0; i < 256; ++i) {
bc[i]=-1;
}
for (int j = 0; j < pat.size(); ++j) {
bc[pat[j]]=j;
}
根据bc表的坏字符优化:
int k=0;//模式串的其实位置
int span;
int pos;//当前正在匹配的位置
while (k+pat.size()<=src.size())
{
for (pos = pat.size(); pos > 0 && src[k + pos] == pat[pos]; --pos);
if(pos == -1)
break;
else {
span = pos - bc[src[k + pos]];
/*失配时根据bc表移动*/
k += (span > 0) ? span : 1;
}
}
好后缀:
suffix表构造
int *suffix = new int[pat.size()];
suffix[pat.size() - 1] = pat.size();
int num;
for (size_t i = 0; i < pat.size()-2; i++)
{
for (size_t num = 0; num <= i && pat[i - num] == pat[pat.size() - 1 - num]; num++);
suffix[i] = num;
}
构造gs表
int *gs = new int[patlen];
/*既无前缀也无子串,则移动距离为模式串长*/
for (int i = 0; i < patlen; ++i) {
gs[i] = patlen;
}
/*前缀情况:1.suffix[i] == i+1找到一个前缀;2.从0->patlen-1-i都是同一个移动距离;3.if (gs[j] == patlen-1)保证小的移动距离不被大的移动距离覆盖*/
for (int i = patlen-1; i > 0 ; --i) {
if (suffix[i] == i+1)
{
for (int j = 0; j < patlen-1-i; ++j) {
if (gs[j] == patlen-1)
gs[j] = patlen-1-i;
}
}
}
/*有子串的情况:i递增保证小的移动距离会覆盖大的移动距离*/
for (int i = 0; i < patlen-1; ++i) {
gs[patlen-1-suffix[i]] = patlen-1-i;
}
代码中3个for循环对应的三张图:
BM的最终算法
int k=0;//模式串的其实位置
int span;
int pos;//当前正在匹配的位置
while (k+pat.size()<=src.size())
{
for (pos = pat.size(); pos > 0 && src[k + pos] == pat[pos]; --pos);
if(pos == -1)
break;
else {
/*失配时根据gs表,bc表移动*/
k += max(gs[pos], pos - bc[src[k + pos]]);
}
}
Sunday
int next[126];
for (int i = 0; i < 126; ++i) {
next[i]=pat.size()+1;
}
for (int j = 0; j < pat.size(); ++j) {
next[pat[j]-' ']=pat.size()-j;
}
int k=0;
int result=0;
for (int pos = 0; pos < pat.size(); ++pos) {
if (src[k + pos] != pat[pos]) {
char ch=src[k + pat.size()];
k = k + next[ch - ' '];
if (k > (src.size() - pat.size())) {
result = -1;
break;
}
pos = 0;
}