算法总结—字符串匹配

字符串匹配的算法很多,我也只会两种特别典型的,KMP和AC自动机,KMP是单模式匹配是AC自动机的退化版,在单匹配的时候比AC自动机更快一点,实现更简单。

AC自动机是多模式匹配,又快又牛逼。

KMP模板如下:

#include<iostream>
#include<string>
using namespace std;
string pat,text;
int Next[1005];

void getNext(string &pat) {
	Next[0]=-1;
	int k=-1;
	for(int i=1;i<pat.size();i++) {
		while(k>-1&&pat[k+1]!=pat[i]) k=Next[k];
		if(pat[k+1]==pat[i]) k++;
		Next[i]=k;
	}
}

int kmp(string &pat,string &text) {
	int k=-1;
	for(int i=0;i<text.size();i++) {
		while(k>-1&&pat[k+1]!=text[i]) k=Next[k];
		if(pat[k+1]==text[i]) k++;
		if(k==pat.size()-1) {
			return i-(pat.size()-1);
		}
	}
	return -1;
}

void kmp2(string &pat,string &text) {
	int k=-1;
	for(int i=0;i<text.size();i++) {
		while(k>-1&&pat[k+1]!=text[i]) k=Next[k];
		if(pat[k+1]==text[i]) k++;
		if(k==pat.size()-1) {
			cout<<i-(pat.size()-1)<<" ";
			k=-1;
			i=i-(pat.size()-1);
		}
	}
	cout<<endl;
}

int main() {
	while(cin>>pat>>text) {
		getNext(pat);
		cout<<kmp(pat,text)<<endl;
		kmp2(pat,text);
	}
}
AC自动机模板(只处理了小写字母,修改代码添加):

#include<iostream>
#include<vector>
#include<cstdio>
#include<queue>
#include<algorithm>
using namespace std;
const int KIND=26;
typedef struct node {
	node* next[KIND];
	int count;
	node* fail;
	string word;  //只是为了打印方便而设置的字段 
	node() {
		for(int i=0;i<KIND;i++) 
		    next[i]=NULL;
		count=0;
		fail=NULL;
	}
}node;
int N;
string pat,text;

void addTrie(node* loop) {  //构建trie 
	for(int i=0;i<pat.size();i++) {
		int index=pat[i]-'a';
		if(loop->next[index]==NULL) {
			loop->next[index]=new node();
		}
		loop=loop->next[index];
	}
	loop->count++;
	loop->word=pat;
}

void buildFail(node* head) {  //bfs构建失配指针 
	queue<node*> q;
	q.push(head);
	while(!q.empty()) {
		node* fa=q.front(); q.pop();
		for(int i=0;i<KIND;i++) {
			if(fa->next[i]!=NULL) {
				if(fa==head) {
					fa->next[i]->fail=head;
				}else {
					node* loop=fa->fail;
					while(loop!=NULL&&loop->next[i]==NULL) 
					    loop=loop->fail;
					    
                    if(loop==NULL) {
                    	fa->next[i]->fail=head;
					}else {
						fa->next[i]->fail=loop->next[i];
					}
				}
				q.push(fa->next[i]);
			}
		}
	}
}

void ACauto(node* head) {
	node *p=head;
	for(int i=0;i<text.size();i++) {
		int index=text[i]-'a';
		while(p!=NULL&&p->next[index]==NULL) p=p->fail;
		if(p==NULL) {
			p=head;
		}else {
			p=p->next[index];
		}
		
		node* loop=p;
		while(loop!=head) {
			if(loop->count>0) {
				cout<<i<<" - "<<loop->word<<endl;//打印出来 
			}
			loop=loop->fail;
		}
	}
}

int main() {	
    while(cin>>text>>N) {
    	node* head=new node();
    	for(int i=1;i<=N;i++) {
    		cin>>pat;
    		addTrie(head);
		}
		buildFail(head);
		ACauto(head);
	}
}



猜你喜欢

转载自blog.csdn.net/ufo___/article/details/80165057