[NOI2011][bzoj2434] Ali's typewriter [AC automaton + dfs sequence + fail tree + tree array]

topic

portal

text

most violent

The most violent method: run kmp through all the strings represented by the query and then output

A little optimization: save all queries, merge the same template string, find next and match

But the two methods are essentially the same, both are violent

less violent

We build an AC automaton for all strings, sort the queries according to $y$, and then run on the AC automaton, updating the answer every time it jumps fail

This can get 70 points, but the time limit will still be about $O\left(n^2\right)$

clever optimization

In this problem, all template strings and text strings are in AC automata

So, what does the title actually ask for?

is how many x strings are the suffixes of a prefix of y strings

So, is there a structure in the AC automaton itself that satisfies such a search?

Yes, that is the fail pointer

The fail pointer of a prefix on the trie points to the node that is its longest suffix; at the same time, starting from a prefix and jumping along the fail pointer all the way to the root node, the prefixes represented by all nodes in the process are all is the suffix of this prefix

That is to say, we regard the fail pointer as a tree edge, and extract this "fail tree" (not to be confused with the next tree of kmp), then we can change the query of the question into this:

Mark the nodes representing all prefixes of the y string, then the number of markers in the subtree of the node representing the x string is the answer to this query

The number of maintenance and can be done together with the dfs order on the fail tree and the tree array

correct answer

There is a repetition in the above process: each time we need to zero the tree array, and then re-insert the new y string prefix node into it - even if we use the method of sorting y, it will TLE

But there is a problem in this process: some points will go in and out many times, which is not efficient. We need to find a way to make the points on each AC automaton only enter and exit the tree array once.

So who can meet this requirement?

Or dfs order, just the dfs order on the original trie tree

We sort the input queries according to the dfs order of the y string on the trie tree, and add and delete them in turn

Because traversing in dfs order can make each point enter once and leave once, the total time efficiency of this method is only $O\left(nlogn\right)$

So this problem is over

Code

There are many mappings in this question, and it is very complicated. There are a lot of repetitive meanings. Be careful when debugging.

The variable name is a bit messy, please forgive me

#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<vector>
#define rank deep_dark_fantasy
using namespace std;
struct node{
    int fail,fa,son[26];
    vector<int>num;
    node(){fail=fa=0;memset(son,0,sizeof(son));num.clear();}
}a[100010];int cnt,tot;
int dfn[100010],clk,end[100010],tmplca,pre[100010],rank[100010];
//dfn是trie树dfs序,rank是dfn的反映射
//end是每个字符串在trie树上的节点编号
//pre表示由dfs序为i的串向dfs序为i+1的串转移时的lca,tmplca是维护这个的辅助变量
struct edge{
    int to,next;
}e[100010];int cnte,first[100010];
inline void addedge(int u,int v){
    e[++cnte]=(edge){v,first[u]};first[u]=cnte;
}
inline void add(char s[]){
    int len=strlen(s),cur=0,i;
    for(i=0;i<len;i++){
        if(s[i]=='P'){a[cur].num.push_back(++tot);continue;}
        if(s[i]=='B'){cur=a[cur].fa;continue;}
        if(!a[cur].son[s[i]-'a']) a[cur].son[s[i]-'a']=++cnt;
        a[a[cur].son[s[i]-'a']].fa=cur;cur=a[cur].son[s[i]-'a'];
    }
}
void getdfn(int u){
    int i,v,len=a[u].num.size();
    for(i=0;i<len;i++){
        dfn[++clk]=a[u].num[i];
        rank[a[u].num[i]]=clk;
        end[a[u].num[i]]=u;
        pre[clk]=tmplca;tmplca=u;
    }
    for(i=0;i<26;i++){
        v=a[u].son[i];if(!v) continue;
        getdfn(v);tmplca=u;
    }
}
int q[100010];
void getfail(){
    int head=0,tail=0,i,u,v;
    for(i=0;i<26;i++){
        if(!a[0].son[i]) continue;
        a[a[0].son[i]].fail=0;q[tail++]=a[0].son[i];
    }
    while(head<tail){
        u=q[head++];
        for(i=0;i<26;i++){
            v=a[u].son[i];
            if(v) a[v].fail=a[a[u].fail].son[i],q[tail++]=v;
            else a[u].son[i]=a[a[u].fail].son[i];
        }
    }
    memset(first,-1,sizeof(first));
    for(i=1;i<=cnt;i++) addedge(a[i].fail,i);
}
char s[100010];int Q;
struct query{
    int x,y,num,ans;
}qq[100010];
bool cmp(query l,query r){return rank[l.y]<rank[r.y];}
bool cmp2(query l,query r){return l.num<r.num;}
int now=0,tmpnow;
struct tree{//树状数组
    int x[100010];
    tree(){memset(x,0,sizeof(x));}
    int lowbit(int pos){return pos&(-pos);}
    void change(int pos,int type){
        for(int i=pos;i<=cnt+1;i+=lowbit(i)) x[i]+=type;
    }
    int ask(int pos){
        int re=0;
        for(int i=pos;i>0;i-=lowbit(i)) re+=x[i];
        return re;
    }
}T;
int faildfn[100010],failclk=0,le[100010],ri[100010];
//faildfn是fail树上的dfs序,le和ri是某个节点在树状数组上的左右区间
void get_fail_dfn(int u){
    int i,v;faildfn[u]=++failclk;le[u]=failclk;
    for(i=first[u];~i;i=e[i].next){
        v=e[i].to;
        get_fail_dfn(v);
    }
    ri[u]=failclk;
}
int main(){
    scanf("%s",s);int i,j,x,y,xx;
    add(s);getdfn(0);
    getfail();get_fail_dfn(0);
    
    scanf("%d",&Q);
    for(i=1;i<=Q;i++) scanf("%d%d",&qq[i].x,&qq[i].y),qq[i].num=i;
    sort(qq+1,qq+Q+1,cmp);//排序
    
    j=1;
    for(i=1;i<=tot;i++){
        y=dfn[i];tmpnow=end[y];
        while(now!=pre[i]){
            T.change(faildfn[now],-1);now=a[now].fa;
        }
        while(tmpnow!=now){
            T.change(faildfn[tmpnow],1);tmpnow=a[tmpnow].fa;
        }//插入、删除节点
        now=end[y];
        while(qq[j].y==y){//处理询问
            xx=end[qq[j].x];
            qq[j].ans=T.ask(ri[xx])-T.ask(le[xx]-1);
            j++;
        }
    }
    
    sort(qq+1,qq+Q+1,cmp2);
    for(i=1;i<=Q;i++) printf("%d\n",qq[i].ans);
}

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324647584&siteId=291194637