-
AC automata
Aho-Corasick automaton, which was produced in Bell Labs in 1975, is a well-known multi-mode matching algorithm.
To learn AC automata, we must know what
Trie is
, which is a dictionary tree.
Trie tree
, also known as trie or
key tree
is a
tree structure
, a variation on the hash tree. A typical application is to count and sort a large number of strings (but not limited to strings), so it is often used by search engine systems for text word frequency statistics.
application
A common example is to give n words, and then give a paragraph containing m characters, let you find out how many words appeared in the article.
To understand AC automata, you must first have the basic knowledge of pattern tree (
dictionary tree
) Trie and
KMP
pattern matching algorithm. AC automaton algorithm is divided into three steps: constructing a
Trie tree
, construction fails
pointer
and a pattern matching process.
//AC自动机模板
#include <bits/stdc++.h>
using namespace std;
const int maxn = 1e7 + 5;
const int MAX = 10000000;
struct node
{
node *next[26];
node *fail;
int sum;
};
node *root;
node *q[MAX]; //队列
char partner[maxn];
char s[105];
int head, tail;
int cnt;
//建立字典树
void Insert(char *s)
{
node *p = root;
for(int i=0; s[i]; i++)
{
int x = s[i] - 'a';
if(p->next[x]==NULL)
{
node *temp = (node *)malloc(sizeof(node));
for(int j=0; j<26; j++)
temp->next[j] = 0;
temp->sum = 0;
temp->fail = 0;
p->next[x] = temp;
}
p = p->next[x];
}
p->sum++;
}
//用队列实现fail
void build_fail_pointer()
{
head = 0;
tail = 1;
q[head] = root;
while(head < tail)
{
node *temp = q[head++];
for(int i=0; i<26; i++)
{
if(temp->next[i])
{
if(temp == root)
{
temp->next[i]->fail = root;
}
else
{
node *r = temp->fail;
while(r)
{
if(r->next[i])
{
temp->next[i]->fail = r->next[i];
break;
}
r = r->fail;
}
if(r==NULL)
temp->next[i]->fail = root;
}
q[tail++] = temp->next[i];
}
}
}
}
//匹配单词
void ac_automation(char *partner)
{
node *p = root;
int len = strlen(partner);
for(int i=0; i<len; i++)
{
int x = partner[i]-'a';
while(!p->next[x] && p!=root)
p = p->fail;
p = p->next[x];
if(!p)
p = root;
node *temp = p;
while(temp != root)
{
if(temp->sum >= 0)
{
cnt += temp->sum;
temp->sum = -1;
}
else
break;
temp = temp->fail;
}
}
}
int main()
{
int T;
scanf("%d", &T);
while(T--)
{
root = (node *)malloc(sizeof(node));
for(int i=0; i<26; i++)
root->next[i] = 0;
root->sum = 0;
root->fail = 0;
int n;
scanf("%d", &n);
getchar();
for(int i=1; i<=n; i++)
{
gets(s);
Insert(s);
}
gets(partner);
cnt = 0;
build_fail_pointer();
ac_automation(partner);
printf("%d\n", cnt);
}
return 0;
}
//KMP模板
//T是模式串
void getNext(char *T)
{
int j, k, tlen = strlen(T);
j = 0; k = -1; next[0] = -1;
while(j < tlen)
if(k == -1 || T[j] == T[k])
next[++j] = ++k;
else
k = next[k];
}
//返回模式串T在主串S中首次出现的位置
//返回的位置是从0开始的。
int KMP_Index(char *T, char *S)
{
int i = 0, j = 0;
getNext(T);
int slen = strlen(S);
int tlen = strlen(T);
while(i < slen && j < tlen)
{
if(j == -1 || S[i] == T[j])
{
i++; j++;
}
else
j = next[j];
}
if(j == tlen)
return i - tlen;
else
return -1;
}
AC Automata Template Title: HDU 2222
Title link: http://acm.hdu.edu.cn/showproblem.php?pid=2222