HDU - 6096 :String (AC自动机,已知前后缀,匹配单词,弱数据)

Bob has a dictionary with N words in it.
Now there is a list of words in which the middle part of the word has continuous letters disappeared. The middle part does not include the first and last character.
We only know the prefix and suffix of each word, and the number of characters missing is uncertain, it could be 0. But the prefix and suffix of each word can not overlap.
For each word in the list, Bob wants to determine which word is in the dictionary by prefix and suffix.
There are probably many answers. You just have to figure out how many words may be the answer.

InputThe first line of the input gives the number of test cases T; T test cases follow.
Each test case contains two integer N and Q, The number of words in the dictionary, and the number of words in the list.
Next N line, each line has a string Wi, represents the ith word in the dictionary (0<|Wi|100000 0<|Wi|≤100000 )
Next Q line, each line has two string Pi , Si, represents the prefix and suffix of the ith word in the list (0<|Pi|,|Si|100000,0<|Pi|+|Si|100000 0<|Pi|,|Si|≤100000,0<|Pi|+|Si|≤100000 )
All of the above characters are lowercase letters.
The dictionary does not contain the same words.

Limits
TT≤5
0<N,Q100000 0<N,Q≤100000
Si+Pi500000 ∑Si+Pi≤500000
Wi500000 ∑Wi≤500000
OutputFor each test case, output Q lines, an integer per line, represents the answer to each word in the list.
Sample Input

1
4 4
aba
cde
acdefa
cdef
a a
cd ef
ac a
ce f

Sample Output

2
1
1
0

题意:已知N个单词,Q次询问,每次询问给出pre和suf,统计有多少个单词的前缀为pre,后缀为suf,而且要满足二者不相交。

思路:我们把询问建立AC自动机,单词用来跑AC自动机,跑到了就累计。

        合理建立AC自动机的方式为:每个询问转为为 suf+'{'+pre;

        跑AC自动机的方式为: 每个单词转化为 S+’{‘+S;

跑的时候如果fail可以走到某个询问,说明这个询问是这里的前后缀。(AC了但是不严谨的代码)

#include<bits/stdc++.h>
#define rep(i,a,b) for(int i=a;i<=b;i++)
using namespace std;
const int maxn=500010;
char c[maxn],s[maxn],pre[maxn],suf[maxn];
int tot,F[maxn],L[maxn],ch[maxn][27],cnt,pos[maxn];
int N,Q,dep[maxn],sum[maxn],fail[maxn],q[maxn],head,tail;
void insert(int opt){
    int Now=0,len1=strlen(suf+1),len2=strlen(pre+1);
    rep(i,1,len1){
        if(!ch[Now][suf[i]-'a']) ch[Now][suf[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][suf[i]-'a'];
    }
    if(!ch[Now][26]) ch[Now][26]=++cnt,sum[cnt]=0; Now=ch[Now][26];
    rep(i,1,len2){
        if(!ch[Now][pre[i]-'a']) ch[Now][pre[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][pre[i]-'a'];
    }
    pos[opt]=Now; dep[Now]=len1+len2;
}
void buildfail()
{
    head=tail=0;
    for(int i=0;i<=26;i++) if(ch[0][i]) q[++head]=ch[0][i];
    while(tail<head){
        int Now=q[++tail];
        for(int i=0;i<=26;i++){
            if(ch[Now][i]) {
                fail[ch[Now][i]]=ch[fail[Now]][i];
                q[++head]=ch[Now][i];
            }
            else ch[Now][i]=ch[fail[Now]][i];
        }
    }
}
void solve(int B,int len)
{
    int Now=0;
    rep(i,B+1,B+len) Now=ch[Now][c[i]-'a'];
    Now=ch[Now][26];
    rep(i,B+1,B+len){
         Now=ch[Now][c[i]-'a']; int tmp=Now;
         while(dep[tmp]>len)  tmp=fail[tmp]; sum[tmp]++;
    }
}
int main()
{
    int T; scanf("%d",&T);
    while(T--){
        tot=cnt=0;
        memset(fail,0,sizeof(fail));
        memset(ch,0,sizeof(ch));
        scanf("%d%d",&N,&Q);
        rep(i,1,N){
            scanf("%s",s+1);
            L[i]=strlen(s+1); F[i]=tot;
            rep(j,1,L[i]) c[++tot]=s[j]; //保存单词
        }
        rep(i,1,Q){
            scanf("%s%s",pre+1,suf+1);
            insert(i);
        }
        buildfail();
        rep(i,1,N) solve(F[i],L[i]);
        for(int i=cnt;i>=1;i--) sum[fail[q[i]]]+=sum[q[i]]; //累加前缀和
        rep(i,1,Q) printf("%d\n",sum[pos[i]]);
    }
    return 0;
}

 虽然上面的代码AC了,但是我感觉是可以hack掉,应该是数据比较水。 因为一个单词对一个询问最多有一个贡献,而这样跑下来有的单词的贡献可能大于1,所以我们加一个时间戳,保证每个单词的贡献最多为1。

#include<bits/stdc++.h>
#define rep(i,a,b) for(int i=a;i<=b;i++)
using namespace std;
const int maxn=500010;
char c[maxn],s[maxn],pre[maxn],suf[maxn];
int tot,F[maxn],L[maxn],ch[maxn][27],cnt,pos[maxn],Laxt[maxn];
int N,Q,dep[maxn],sum[maxn],fail[maxn],q[maxn],head,tail;
void insert(int opt){
    int Now=0,len1=strlen(suf+1),len2=strlen(pre+1);
    rep(i,1,len1){
        if(!ch[Now][suf[i]-'a']) ch[Now][suf[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][suf[i]-'a'];
    }
    if(!ch[Now][26]) ch[Now][26]=++cnt,sum[cnt]=0; Now=ch[Now][26];
    rep(i,1,len2){
        if(!ch[Now][pre[i]-'a']) ch[Now][pre[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][pre[i]-'a'];
    }
    pos[opt]=Now; dep[Now]=len1+len2;
}
void buildfail()
{
    head=tail=0;
    for(int i=0;i<=26;i++) if(ch[0][i]) q[++head]=ch[0][i];
    while(tail<head){
        int Now=q[++tail];
        for(int i=0;i<=26;i++){
            if(ch[Now][i]) {
                fail[ch[Now][i]]=ch[fail[Now]][i];
                q[++head]=ch[Now][i];
            }
            else ch[Now][i]=ch[fail[Now]][i];
        }
    }
}
void solve(int time,int B,int len)
{
    int Now=0;
    rep(i,B+1,B+len) Now=ch[Now][c[i]-'a'];
    Now=ch[Now][26];
    rep(i,B+1,B+len){
         Now=ch[Now][c[i]-'a']; int tmp=Now;
         while(tmp) {
            if(Laxt[tmp]==time) break;
            Laxt[tmp]=time;//加一个时间戳,保证每个单词的贡献最多为1
            if(dep[tmp]<=len) sum[tmp]++;
            tmp=fail[tmp];
         }
    }
}
int main()
{
    int T; scanf("%d",&T);
    while(T--){
        tot=cnt=0;
        memset(fail,0,sizeof(fail));
        memset(ch,0,sizeof(ch));
        memset(Laxt,0,sizeof(Laxt));
        scanf("%d%d",&N,&Q);
        rep(i,1,N){
            scanf("%s",s+1);
            L[i]=strlen(s+1); F[i]=tot;
            rep(j,1,L[i]) c[++tot]=s[j]; //保存单词
        }
        rep(i,1,Q){
            scanf("%s%s",pre+1,suf+1);
            insert(i);
        }
        buildfail();
        rep(i,1,N) solve(i,F[i],L[i]);
        rep(i,1,Q) printf("%d\n",sum[pos[i]]);
    }
    return 0;
}

猜你喜欢

转载自www.cnblogs.com/hua-dong/p/9807370.html
今日推荐