问题:
查找子串在主串中出现的位置
算法:
- Brute-force:从主串中第pos个字符开始与模式串的第一个字符比较,如果相等则继续逐个比较后续字符;否则从主串的下一个字符开始比较,依次类推。
- KMP:在每一趟出现字符不相等时,不需要退回主串的指针,而是利用已经得到的前面部分匹配的结果,将模式串向右滑动若干个字符后继续与主串的当前字符进行比较。
代码:
1.顺序串基本操作的头文件:
#pragma once
#define MaxLen 80
#include<iostream>
typedef struct
{
char str[MaxLen];
int length;
} SeqString;
void StrString(SeqString *s, char cstr[])
/*串的赋值操作*/
{
int i = 0;
for (i = 0; cstr[i] != '\0'; i++)
{
s->str[i] = cstr[i];
s->length = i + 1;
}
}
int StrEmpty(SeqString s)
/*判断是否为空*/
{
if (s.length == 0)
return 1;
else
return 0;
}
int StrLength(SeqString S)
/*求串的长度*/
{
return S.length;
}
void StrCopy(SeqString *T, SeqString S)
{
int i;
for (i = 0; S.str[i] != '\0'; i++)
{
T->str[i] = S.str[i];
T->length = i + 1;
}
}
int StrCompare(SeqString S, SeqString T)
/*比较两个串的大小,就是比较每个字符的ASCII值的大小,如果S大于T,返回1,等于返回0, 小于返回-1*/
{
int i = 0;
while (S.str[i] != '\0' && T.str[i] != '\0')
{
if (S.str[i] != T.str[i])
return(S.str[i] - T.str[i]);
}
return (S.length-T.length);//比较完毕返回长度的差值
}
int StringInsert(SeqString *S, int pos, SeqString T)
/*在串S中的pos位置插入T,有三种情况*/
{
int i;
if ((S->length + T.length) <= MaxLen)
//T能够完全插入进去
{
//先将pos往后移动
for (i = S->length + T.length - 1; i >= pos+T.length-1; i--)
{
S->str[i] = S->str[i - T.length];
}
//插入
for (i = pos; i < pos + T.length-1; i++)
{
S->str[i] = T.str[i - pos];
}
S->length += T.length;
return 1;
}
else if( (pos + T.length <= MaxLen)&&( T.length +S->length > MaxLen) ) // T能完全插入,S要被舍去一部分
{
//先移动
for (i = MaxLen - 1; i >= pos+T.length-1; i++)
{
S->str[i] = S->str[i - T.length];
}
//插入
for (i = pos; i < pos + T.length - 1; i++)
{
S->str[i] = T.str[i-pos];
}
S->length = MaxLen;
return 0;
}
else //S的pos后面部分被全部删除
{
for (i = pos; i <= MaxLen - 1; i++)
{
S->str[i] = T.str[i - pos];
}
return 0;
}
}
int StrDelete(SeqString *S, int pos, int len)
/*删除pos开始的len个字符*/
{
int i;
if (pos < 0 || len<0 || pos + len >S->length)
{
std::cout << "参数不合法" << std::endl;
}
else //覆盖掉pos开始后的len个字符
{
for (i = pos+len ;i< S->length;i++)
{
S->str[i-len] = S->str[i];
}
S->length -= len;
return 1;
}
}
int StrCat(SeqString *S, SeqString T)
/*将T连在S的末端*/
{
int i;
if (S->length + T.length <= MaxLen)//如果小于直接连
{
for (i = S->length ; i < S->length + T.length; i++)
{
S->str[i] = T.str[i - S->length];
}
S->length += T.length;
return 1;
}
else //T要部分丢失
{
for (i = S->length; i < MaxLen; i++)
{
S->str[i] = T.str[i - S->length];
}
S->length = MaxLen;
return 0;
}
}
void StrClear(SeqString *S)
/*清空*/
{
S->length = 0;
}
- cpp文件:
//字符串匹配
#include "pch.h"
#include <iostream>
#include"SeqString.h"
#include<time.h>
int BruteForce(SeqString T, SeqString S)
{
int i,j;
i = 0;
j = 0;
while (i < T.length && j <S.length)
{
if (T.str[i] == S.str[j])
{
i++;
j++;
}
else
{
i = i - j + 1;//退回去
j = 0;
}
}
if (j == S.length )
return i-j+1;
else
return -1;
}
int GoNext(SeqString S, int *next)
{
next[0] = - 1;
int i, j;
i = 0;
j = -1;
while (i < S.length)
{
if (j = -1 || S.str[i] == S.str[j])
{
i++;
j++;
next[i] = j;
}
else
j = next[i];
}
return 1;
}
int KMP(SeqString T, SeqString S, int *next)
{
int i, j;
i = 0;
j = 0;
while (i < T.length && j < S.length)
{
if (j == -1 || T.str[i] == S.str[j])
{
i++;
j++;
}
else
{
j = next[j];
}
}
if (j == S.length )
{
return i - j + 1;
}
else
return -1;
}
void CreateStr(SeqString *S, char str[])
/*创建字符串*/
{
strcpy_s(S->str, str);
S->length = strlen(str);
}
void PrintStr(SeqString S)
/*打印显示字符串*/
{
int i;
for (i = 0; i < S.length; i++)
{
printf("%c", S.str[i]);
}
printf("\n");
}
int main()
{
clock_t start, end;
int pos;
SeqString T, S;
int next[20];
char str[50];
std::cout << "Input string:\n";
gets_s(str);
CreateStr(&T, str);
PrintStr(T);
std::cout << "Input substring:\n";
gets_s(str);
CreateStr(&S, str);
PrintStr(S);
start = clock();
GoNext(S, next);
pos = KMP(T, S, next);
end = clock();
std::cout << "KMP算法得到的pos:" << pos <<" 计算时间为:"<< (long)(end-start)/CLOCKS_PER_SEC<<std::endl;
start = clock();
pos = BruteForce(T, S);
end = clock();
std::cout << "BruteForece得到的pos:" << pos << " 计算时间为:" << (long)(end - start) / CLOCKS_PER_SEC << std::endl;
}
程序运行结果: