HDU - 2222 Keywords Search (AC自动机)

                                      Keywords Search

Problem Description

In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.

Input

First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.

Output

Print how many keywords are contained in the description.

Sample Input

1

5

she

he

say

shr

her

yasherhs

Sample Output

3

题意描述:

给出n个单词,和一个文本,求文本中出现了几个单词。

解题思路:

AC自动机的模板题,和全文检索其实是相同的,不过这道题的文本达到了一百万,用暴力的话,应该会超时,直接AC自动机。

关于AC自动机:https://www.cnblogs.com/wenzhixin/p/9448045.html

#include<stdio.h>
#include<string.h>
#include<algorithm>
#include<queue>
using namespace std;
int ch[500010][26],v[500010],fail[500010];
char txt[1000010];
int sz;
void getch(char s[],int len)//单词存入字典树 
{
	int u,c,i;
	u=0;
	for(i=0;i<len;i++)
	{
		c=s[i]-'a';
		if(ch[u][c]==-1)//此节点行未出现过此字母,为其赋上节点号 
			ch[u][c]=sz++;
		u=ch[u][c];//更新节点行 
	}
	v[u]++;//单词结束,在单词节点结尾处,此单词数加一 
}
void getfail()//存fail[]数组,相当于链表发的fail指针,原理都是一样的 
{
	//给ch[]数组更新是为了补边,是为了方便查找,当查找失配时,可以直接从相应的节点行开始查找了。 
	queue<int> q;
	int u,tem,i;
	u=0;
	fail[0]=0;//此处相当于根节点的fail指针指向自己,节点编号是从1开始的,0可以表示为根节点 
	for(i=0;i<26;i++)//第0行的节点,也就是根节点的下一级 
	{
		tem=ch[u][i];
		if(tem==-1)
			ch[u][i]=0;//若未出现此字母,直接继续从根节点开始查找 
		else
		{
			fail[tem]=0;//根节点的下一级都是指向根节点 
			q.push(tem);//将找到的字母的节点编号入队 
		}
	}
	while(!q.empty())
	{
		u=q.front();//获得队首 
		q.pop();//出队 
		for(i=0;i<26;i++)
		{
			tem=ch[u][i];
			if(tem==-1)//此节点行未出现的字母 
				ch[u][i]=ch[fail[u]][i];//ch[]数组值都更新为其节点行的fail指针指向的节点行对应字母的值 
			else
			{
				fail[tem]=ch[fail[u]][i];//出现的字母,存储其节点的fail指针为节点行的fail指针指向的节点行对应字母的值
				q.push(tem);//出现字母的节点编号入队 
			}
				
		}
	}
}
int put(char s[])//查找文本 
{
	int u,c,i,len,tem,sum;
	u=0;
	sum=0;
	len=strlen(s);
	for(i=0;i<len;i++)
	{
		c=s[i]-'a';
		u=ch[u][c];//更新节点行 
		tem=u;//存储此节点编号 
		while(tem!=0)
		{
			sum=sum+v[tem];//加上此节点后的单词数值 
			v[tem]=0;//加过的单词,更新单词数为0,避免重复统计 
			tem=fail[tem];//继续查找此节点的fail指针 
		}
	}
	return sum;
}
int main()
{
	int t,n,len,i;
	char s[110];
	while(scanf("%d",&t)!=EOF)
	{
		while(t--)
		{
			sz=1;
			memset(ch,-1,sizeof(ch));
			memset(v,0,sizeof(v));
			scanf("%d",&n);
			for(i=1;i<=n;i++)
			{
				scanf("%s",s);
				len=strlen(s);
				getch(s,len);
			}
			getfail();
			scanf("%s",txt);
			printf("%d\n",put(txt));
		}
	}
	return 0;
}

猜你喜欢

转载自blog.csdn.net/kongsanjin/article/details/81743868