"Data Structure" Experimental Report 4: String Pattern Matching (BF Algorithm, KMP Algorithm)

1. Purpose of the experiment

1. Understand the basic concept of strings .

2. Master the implementation of pattern matching algorithm for strings .

2. Experiment preview

Explain the following concepts

1. Pattern matching:

        String pattern matching is the positioning operation of substrings .

        There are two strings S and T, S is the main string (text string) , and T is the substring (pattern string) . Find the substring matching the pattern string T in the main string S, if the match is successful, determine the position of the first character in the matching substring in the main string S.

2. BF algorithm:

        Brute Force is one of the pattern matching algorithms.

        Algorithm idea: start to compare the first character of the main string and the pattern string, and compare the next character if the match is successful ; Characters backtrack to the first character of the pattern string , and character comparisons continue.

3. KMP algorithm:

        The KMP algorithm is also a kind of pattern matching algorithm, which is an improved matching algorithm.

        The improvement of the KMP algorithm is that when there is a character mismatch, the characters of the main string comparison do not need to be backtracked , while the pattern string uses the part of the characters that have been successfully matched to determine the backtracking position of the pattern string (no need to go back to the first character) .

        The KMP algorithm reduces the number of matches between the pattern string and the main string to achieve the purpose of fast matching .

3. Experimental content and requirements

 ! Note !

        The character array data in this experiment stores strings starting from the array component whose subscript is 0. In some textbooks, for the convenience of explaining the problem, the string is stored from the array component with the subscript 1, and the component with the subscript 0 is left unused, so the code is slightly different.

1. Read and run the following program, and write the running result according to the input.

#include<stdio.h>
#include<string.h>
#define MAXSIZE 100  //串的最大长度

typedef struct{
	char data[MAXSIZE];  //存储串的一维数组(从下标为0的数组分量开始存储字符串)
	int length;          //串的当前长度
}SqString;

int strCompare(SqString *s1,SqString *s2);  /*串的比较*/
void show_strCompare();
void strSub(SqString *s,int start,int sublen,SqString *sub);  /*求子串*/
void show_subString();

int strCompare(SqString *s1,SqString *s2){
	int i;
	for(i=0;i<s1->length && i<s2->length;i++)
		if(s1->data[i] != s2->data[i])
			return s1->data[i] - s2->data[i];
    //字符比较完成,还需比较字符串长度
	return s1->length - s2->length;
	/* s1=s2,返回值=0
	   s1>s2,返回值>0
       s1<s2,返回值<0 */
}

void show_strCompare(){
    SqString s1,s2;
    int k;
    printf("\n***show Compare***\n");
    printf("input string s1:");
    gets(s1.data);
    s1.length=strlen(s1.data);
    printf("input string s2:");
    gets(s2.data);
    s2.length=strlen(s2.data);
    if((k=strCompare(&s1,&s2))==0)
        printf("s1=s2\n");
    else if(k<0)
        printf("s1<s2\n");
    else
        printf("s1>s2\n");
    printf("\n***show over***\n");
}

void strSub(SqString *s,int start,int sublen,SqString *sub){
	int i;
	if(start<1 || start>s->length || sublen>s->length-start+1)
		sub->length = 0;
    else
    {
        for(i=0;i<sublen;i++)
		sub->data[i]=s->data[start+i-1];
	    sub->length=sublen;
    }
}

void show_subString(){
    SqString s,sub;
    int start,sublen,i;
    printf("\n***show subString***\n");
    printf("input string s:");
    gets(s.data);
    s.length=strlen(s.data);
    printf("input start:");
    scanf("%d",&start);
    printf("input sublen:");
    scanf("%d",&sublen);
    strSub(&s,start,sublen,&sub);
    if(sub.length==0)
        printf("ERROR!\n");
    else{
        printf("subString is:");
        for(i=0;i<sublen;i++)
            printf("%c",sub.data[i]);
    }
    printf("\n***show over***\n");
}

int main(){
    int n;
    do{
        printf("\n---String---\n");
        printf("1. strCompare\n");
        printf("2. subString\n");
        printf("0. EXIT\n");
        printf("\ninput choice:");
        scanf("%d",&n);
        getchar();
        switch(n){
            case 1:
                show_strCompare();
                break;
            case 2:
                show_subString();
                break;
            default:
                n=0;
                break;
        }
    }while(n);
    return 0;
}

enter:

1

student

students

2

Computer Data Stuctures

10

4

operation result:

2. Implement the pattern matching algorithm for strings. Supplement the following program to realize the BF and KMP algorithms of strings.

#include<stdio.h>
#include<string.h>

#define MAXSIZE 100  //串的最大长度

typedef struct{
	char data[MAXSIZE];  //存储串的一维数组(从下标为0的数组分量开始存储字符串)
	int length;          //串的当前长度
}SqString;

int index_bf(SqString *s,SqString *t,int start);              /*模式匹配-BF算法*/
void getNext(SqString *t,int next[]);                         /*计算目标串的next数组*/
int index_kmp(SqString *s,SqString *t,int start,int next[]);  /*模式匹配-KMP算法*/
void show_index();

int index_bf(SqString *s,SqString *t,int start){
    //补充代码 
}

void getNext(SqString *t,int next[]){  /*计算模式串t的next数组*/
	int i=0;
	int j=-1;
	next[0]=-1;
	while(i < t->length){
		if(-1==j || (t->data[i]==t->data[j]))
		{
			i++;
			j++;
			next[i]=j;
		}
		else
            j=next[j];
	}
}

int index_kmp(SqString *s,SqString *t,int start,int next[]){
    //补充代码
}

void show_index(){
	SqString s,t;
	int k;
	int i;
	int next[MAXSIZE]={0};
	int nextval[MAXSIZE]={0};
	printf("\n***show index***\n");
	printf("input string s:");
	gets(s.data);
	s.length=strlen(s.data);
	printf("input string t:");
	gets(t.data);
	t.length=strlen(t.data);
	printf("input start position(1≤start position≤%d):",s.length);
	scanf("%d",&k);
	printf("BF:\nthe result of BF is %d\n",index_bf(&s,&t,k));
	getNext(&t,next);
	printf("KMP:\n");
	printf("next[]:");
	for(i=0;i<t.length;i++)
	    printf("%3d",next[i]);
	printf("\n");
	printf("the result of KMP is %d\n",index_kmp(&s,&t,k,next));
	printf("\n***show over***\n");
}

int main(){
	show_index();
	return 0;
}

BF algorithm code:

int index_bf(SqString *s,SqString *t,int start){  /*返回模式串t在主串s中第start个字符开始第一次出现的位置;若不存在,返回0*/
    //模式串t长度为0 或 起始查找位置超出主串范围:直接返回0
    if(0 == t->length || start < 1 || start > s->length)
        return (0);
    int pos=start-1;  //pos记录主串s开始匹配的字符下标
    int i=pos;        //i指向主串s当前正待比较的字符下标
    int j=0;          //j指向模式串t当前正待比较的字符下标
    while( i < s->length && j < t->length )
    {
        if(s->data[i] == t->data[j])  //匹配成功:i,j都++,继续比较后续字符
        {
            i++;
            j++;
        }
        else  //匹配失败
        {
            pos++;  //主串开始匹配的位置+1
            i=pos;  //i回溯到主串初始位置下一字符
            j=0;    //j回溯到模式串第一个字符
        }
    }
    if(j >= t->length)
        return (pos-start+2);  //返回模式串t在主串s中第start个字符开始第一次出现的位置
    else
        return (-1);  //查找失败,返回-1
}/*index_bf*/

KMP algorithm code:

int index_kmp(SqString *s,SqString *t,int start,int next[]){  /*返回模式串t在主串s中第start个字符开始第一次出现的位置;若不存在,返回0*/
    //模式串t长度为0 或 起始位置超出主串范围:直接返回0
    if(0 == t->length || start < 1 || start > s->length)
        return (0);
    int i=start-1;  //i指向主串s当前正待比较的字符下标
    int j=0;        //j指向模式串t当前正待比较的字符下标
    while(i<s->length && j<t->length)
    {
        if(-1 == j || (s->data[i] == t->data[j]))
        {
            i++;
            j++;
        }
        else  //匹配失败
        {
            j=next[j];  //i不回溯,j回溯到next[j]
        }
    }
    if(j>=t->length)
        return (i-j-start+2);  //返回模式串t在主串s中第start个字符开始第一次出现的位置
    else
        return (-1);  //查找失败,返回-1
}/*index_kmp*/

enter:

abaabb abaa abaa

abaaba

1

operation result:

3. On the basis of the second question, improve the next array, realize the algorithm of calculating the nextval array of the target string, and verify it.

Nextval array algorithm code:

void getNextVAL(SqString *t,int nextval[]){  /*计算模式串t的nextval数组*/
	int i=0;
	int j=-1;
	nextval[0]=-1;
	while(i < t->length){
		if(-1 == j || (t->data[i]==t->data[j]))
		{
			i++;
			j++;
			if(t->data[i] != t->data[j])
                nextval[i]=j;
            else
                nextval[i]=nextval[j];
		}
		else
            j=nextval[j];
	}
}/*getNextVAL*/

show_index function supplementary code:

void show_index(){
	SqString s,t;
	int k;
	int i;
	int next[MAXSIZE]={0};
	int nextval[MAXSIZE]={0};
	printf("\n***show index***\n");
	printf("input string s:");
	gets(s.data);
	s.length=strlen(s.data);
	printf("input string t:");
	gets(t.data);
	t.length=strlen(t.data);
	printf("input start position(1≤start position≤%d):",s.length);
	scanf("%d",&k);
	printf("BF:\nthe result of BF is %d\n",index_bf(&s,&t,k));
	getNext(&t,next);
    getNextVAL(&t,nextval);
	printf("KMP:\n");
	printf("   next[]:");
	for(i=0;i<t.length;i++)
	    printf("%3d",next[i]);
	printf("\n");
    printf("nextval[]:");
	for(i=0;i<t.length;i++)
	    printf("%3d",nextval[i]);
	printf("\n");
	printf("the result of KMP is %d\n",index_kmp(&s,&t,k,nextval));
	printf("\n***show over***\n");
}

enter:

abaabb abaa abaa

abaaba

1

operation result:

Guess you like

Origin blog.csdn.net/Amentos/article/details/127277450
Recommended