AC automata and KMP template

AC automata

 
Aho-Corasick automaton, which was produced in Bell Labs in 1975, is a well-known multi-mode matching algorithm.
To learn AC automata, we must know what Trie is , which is a dictionary tree. Trie tree , also known as trie or key tree is a tree structure , a variation on the hash tree. A typical application is to count and sort a large number of strings (but not limited to strings), so it is often used by search engine systems for text word frequency statistics.
application
A common example is to give n words, and then give a paragraph containing m characters, let you find out how many words appeared in the article.
To understand AC automata, you must first have the basic knowledge of pattern tree ( dictionary tree ) Trie and KMP pattern matching algorithm. AC automaton algorithm is divided into three steps: constructing a Trie tree , construction fails pointer and a pattern matching process.

//AC自动机模板
#include <bits/stdc++.h>
using namespace std;
const int maxn = 1e7 + 5;
const int MAX = 10000000;

struct node
{
	node *next[26];
	node *fail;
	int sum;
};

node *root;
node *q[MAX];          //队列
char partner[maxn];
char s[105];
int head, tail;
int cnt;

//建立字典树
void Insert(char *s)
{
	node *p = root;
	for(int i=0; s[i]; i++)
	{
		int x = s[i] - 'a';
		if(p->next[x]==NULL)
		{
			node *temp = (node *)malloc(sizeof(node));
			for(int j=0; j<26; j++)
				temp->next[j] = 0;
			temp->sum = 0;
			temp->fail = 0;
			p->next[x] = temp;
		}
		p = p->next[x];
	}
	p->sum++;
}

//用队列实现fail
void build_fail_pointer()
{
	head = 0;
	tail = 1;
	q[head] = root;
	while(head < tail)
	{
		node *temp = q[head++];
		for(int i=0; i<26; i++)
		{
			if(temp->next[i])
			{
				if(temp == root)
				{
					temp->next[i]->fail = root;
				}
				else
				{
					node *r = temp->fail;
					while(r)
					{
						if(r->next[i])
						{
							temp->next[i]->fail = r->next[i];
							break;
						}
						r = r->fail;
					}
					if(r==NULL)
						temp->next[i]->fail = root;
				}
				q[tail++] = temp->next[i];
			}
		}
	}
}

//匹配单词
void ac_automation(char *partner)
{
	node *p = root;
	int len = strlen(partner);
	for(int i=0; i<len; i++)
	{
		int x = partner[i]-'a';
		while(!p->next[x] && p!=root)
			p = p->fail;
		p = p->next[x];
		if(!p)
			p = root;
		node *temp = p;
		while(temp != root)
		{
			if(temp->sum >= 0)
			{
				cnt += temp->sum;
				temp->sum = -1;
			}
			else
				break;
			temp = temp->fail;
		}
	}
}

int main()
{
	int T;
	scanf("%d", &T);
	while(T--)
	{
		root = (node *)malloc(sizeof(node));
		for(int i=0; i<26; i++)
			root->next[i] = 0;
		root->sum = 0;
		root->fail = 0;
		int n;
		scanf("%d", &n);
		getchar();
		for(int i=1; i<=n; i++)
		{
			gets(s);
			Insert(s);
		}
		gets(partner);
		cnt = 0;
		build_fail_pointer();
		ac_automation(partner);
		printf("%d\n", cnt);
	}
	return 0;
}



//KMP模板
//T是模式串
void getNext(char *T)
{
    int j, k, tlen = strlen(T);
    j = 0; k = -1; next[0] = -1;
    while(j < tlen)
        if(k == -1 || T[j] == T[k])
            next[++j] = ++k;
        else
            k = next[k];
}
//返回模式串T在主串S中首次出现的位置
//返回的位置是从0开始的。
int KMP_Index(char *T, char *S)
{
    int i = 0, j = 0;
    getNext(T);
    int slen = strlen(S);
    int tlen = strlen(T);
    while(i < slen && j < tlen)
    {
        if(j == -1 || S[i] == T[j])
        {
            i++; j++;
        }
        else
            j = next[j];
    }
    if(j == tlen)
        return i - tlen;
    else
        return -1;
}


AC Automata Template Title: HDU 2222

Title link: http://acm.hdu.edu.cn/showproblem.php?pid=2222


Guess you like

Origin blog.csdn.net/qq_31281327/article/details/76465976