[Data structure] Simple pattern matching algorithm for strings, KMP algorithm (C language)

1. Simple pattern matching algorithm for strings

The simple pattern matching Brute-Force (Bruce-Force) algorithm is a matching algorithm with backtracking . The basic idea of ​​the algorithm is: start from the first character of the main string s, and compare it with the first character of the pattern string t , if they are equal, continue to compare the subsequent characters, if not, go back to the second character of the main string s and start to compare with the pattern string t again, until each character in the pattern string t is continuous with one of the main string s If the character subsequences are all equal, the match is said to be successful, and the position of the character equal to the first character in t in the main string s is returned; or if there is no character sequence equal to the pattern string in the main string, the match is unsuccessful.
Three pointers are set during implementation: i, j, start; i points to the currently compared character in the main string s, j points to the current compared character in the substring t, and start records the starting point in the main string s for each comparison.

1.1 Simple pattern matching algorithm

/*简单模式匹配算法(有回溯)*/
int StrIndex(SString* s, SString* t) {
    
    
	int i, j, start = 0;
	if (t->len == 0) 					//模式串为空时是任意串的匹配串
		return 0;
	i = start, j = 0;
	while (i < s->len && j < t->len) {
    
    
		if (s->ch[i] == t->ch[j]) {
    
    		//当前对应字符相等时向前推进
			i++;
			j++;
		}
		else {
    
    
			start++;					//当前对应字符不相等时回溯
			i = start;					//主串从start+1开始
			j = 0;						//模式串从0开始
		}
	}
	if (j >= t->len) 
		return start;					//匹配成功时,返回匹配起始位置
	else
		return -1;						//匹配不成功,返回-1
}

1.2 Complete implementation code

# include<stdio.h>
# define MAXLEN 40

/*顺序串的存储结构*/
typedef struct {
    
    
	char ch[MAXLEN];
	int len;
}SString;

/*顺序串初始化*/
void StrInit(SString* s) {
    
    
	s->len = 0;
}

/*顺序串赋值*/
void StrAssign(SString* s, char* tval) {
    
    
//将字符串常量tval的值赋值给串s
	int len, i = 0;
	while (tval[i] != '\0')
		i++;
	len = i;
	for (i = 0; i < len; i++)
		s->ch[i] = tval[i];
	s->len = len;
}

/*简单模式匹配算法(有回溯)*/
int StrIndex(SString* s, SString* t) {
    
    
	int i, j, start = 0;
	if (t->len == 0) {
    
    					//模式串为空时是任意串的匹配串
		printf("模式串为空!\n");
		return 0;
	}
	i = start, j = 0;
	while (i < s->len && j < t->len) {
    
    
		if (s->ch[i] == t->ch[j]) {
    
    		//当前对应字符相等时向前推进
			i++;
			j++;
		}
		else {
    
    
			start++;					//当前对应字符不相等时回溯
			i = start;					//主串从start+1开始
			j = 0;						//模式串从0开始
		}
	}
	if (j >= t->len)					//匹配成功
		printf("匹配成功!起始位置为:%d\n", start + 1);
	else
		printf("匹配失败!\n");			//匹配不成功
}

int main() {
    
    
	char tval_s[14] = {
    
     'a','b','a','b','c','a','b','c','a','c','b','a','b' };
	char tval_t[6] = {
    
     'a','b','c','a','c' };
	SString s, t;
	StrAssign(&s, tval_s);
	StrAssign(&t, tval_t);
	StrIndex(&s, &t);
	return 0;
}

1.3 Running Results

Simple pattern matching algorithm running results

2. KMP algorithm

For a detailed explanation of the KMP algorithm, see @Hatton's light blog: https://blog.csdn.net/weixin_46007276/article/details/104372119?spm=1001.2014.3001.5502

2.1 Complete implementation code

# include<stdio.h>
# define MAXLEN 40

/*顺序串的存储结构*/
typedef struct {
    
    
	char ch[MAXLEN];
	int len;
}SString;

/*顺序串初始化*/
void StrInit(SString* s) {
    
    
	s->len = 0;
}

/*顺序串赋值*/
void StrAssign(SString* s, char* tval) {
    
    
//将字符串常量tval的值赋值给串s
	int len, i = 0;
	while (tval[i] != '\0')
		i++;
	len = i;
	for (i = 0; i < len; i++)
		s->ch[i] = tval[i];
	s->len = len;
}

int next[MAXLEN];

/*求next[]数组*/
/*next[j]表示模式串t的第j-1个字符前的字符串的最大前后缀*/
void GetNext(SString* t, int* next) {
    
    
	int j, k;
	j = 0; k = -1;
	next[0] = -1;							//第一个字符前无字符串,赋值-1
	while (j < t->len - 1){
    
    
		if (k == -1 || t->ch[j] == t->ch[k]){
    
    
			j++; 
			k++;
			next[j] = k;
		}
		else
			k = next[k];
	}
}

/*KMP算法*/
int KMPIndex(SString* s, SString* t){
    
    
	int i = 0, j = 0;
	while (i < s->len && j < t->len){
    
    
		if (j == -1 || s->ch[i] == t->ch[j]){
    
    
			i++; 
			j++;
		}
		else 
			j = next[j];
	}
	if (j >= t->len)
		return(i - t->len + 1);  			//返回匹配模式串的首字符下标+1
	else
		return(-1);        					//返回不匹配标志
}

int main() {
    
    
	char tval_s[14] = {
    
     'a','b','a','b','c','a','b','c','a','c','b','a','b' };
	char tval_t[6] = {
    
     'a','b','c','a','c' };
	SString s, t;
	StrAssign(&s, tval_s);
	StrAssign(&t, tval_t);
	GetNext(&t, next);
	printf("%d\n", KMPIndex(&s, &t));
	return 0;
}

2.2 Running results

KMP algorithm running results
For more data structure content, follow my "Data Structure" column: https://blog.csdn.net/weixin_51450101/category_11514538.html?spm=1001.2014.3001.5482

Guess you like

Origin blog.csdn.net/weixin_51450101/article/details/122684649
Recommended