【算法】后缀自动机小记

  后缀自动机:可以识别一个字符串所有的子串,把子串中出现的位置右端点集合相同的子串归为一类记为状态,则状态最多有\(O(n)\) 个。

  在后缀自动机上,一个状态代表了长度在\(l[p] -> l[fa[p]]\) 之间的若干个子串。

  区分SAM的转移 & parent 树的边:

    在SAM上状态的转移 = 在保证所代表的字符串左端点不右移的情况下,是否存在可以添加 c 在末尾的串

    在 parent 树上,父亲与儿子的关系为:儿子的右端点为父亲右端点的子集,而儿子的左端点在保证右端点不变的情况下尽量向右延伸

  (SAM上跳parent, AC自动机 & KMP 跳 fail 都是基于贪心匹配的原则)

  *打标记,记录贡献时要记得上传&下放


  一点点个人有关广义SAM的心得与体会:

    1.对多个串建立SAM :在每插入一个新的串的时候把 \(last\) 更新为根

    2.对trie树建立SAM:每个节点的 \(last\) 为父亲所建立的 \(np\) 节点

    考虑插入一个新的节点的过程,\(last\) 为上一个长度最长的,具有 \(right = {n}\) 的状态,不断跳转 \(fa[p]\) 直到找到可以转移的地方以插入新的子串。由于 \({n + 1}\) 这个右端点从未出现过,所以必然是新建了一个新的状态。

    但是在广义SAM上呢?这时候所找到的点 \(p\) 和 \(q\),完全有可能是有其它串所建立的,包含了 \({n + 1}\) 这个端点的一个状态。那么这个时候我们所新建的节点等于是加入了一个本来存在的状态,又因为此时 \(l[p] = l[fa[p]]\),所以这个节点等效于没有出现过。其他的与后缀自动机基本没有区别。

  1.[COCI2011-2012#5] POPLOCAVANJE

#include <bits/stdc++.h>
using namespace std;
#define maxn 300100
#define maxm 600100
int n, m, ans, cnt = 1, last = 1;
int t[maxn], id[maxm], l[maxm], rec[maxm];
int ch[maxm][26], pos[maxm], fa[maxm]; 
char s[maxn];

int read()
{
    int x = 0, k = 1;
    char c; c = getchar();
    while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); }
    while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar();
    return x * k;
}

void Ins(int c, int id)
{
    int p = last, np = ++ cnt; pos[np] = id;
    last = np; l[np] = l[p] + 1;
    for(; p && !ch[p][c]; p = fa[p]) ch[p][c] = np;
    if(!p) fa[np] = 1;
    else
    {
        int q = ch[p][c];
        if(l[p] + 1 == l[q]) fa[np] = q;
        else
        {
            int nq = ++ cnt; l[nq] = l[p] + 1;
            memcpy(ch[nq], ch[q], sizeof(ch[q]));
            fa[nq] = fa[q], fa[q] = fa[np] = nq;
            for(; ch[p][c] == q; p = fa[p]) ch[p][c] = nq;
        }
    }
}

void Rsort()
{
    for(int i = 1; i <= cnt; i ++) t[l[i]] ++;
    for(int i = 1; i <= n; i ++) t[i] += t[i - 1];
    for(int i = 1; i <= cnt; i ++) id[t[l[i]] --] = i;
}

void Cal()
{
    Rsort(); memset(t, 0, sizeof(t));
    for(int i = 1; i <= cnt; i ++) 
        rec[id[i]] = max(rec[id[i]], rec[fa[id[i]]]);
    for(int i = 1; i <= cnt; i ++)
        if(pos[id[i]])
        {
            int x = id[i];
            t[pos[x] + 1] --, t[pos[x] - rec[x] + 1] ++;
        }
    for(int i = 1; i <= n; i ++) t[i] += t[i - 1];
    for(int i = 1; i <= n; i ++) if(!t[i]) ans ++;
}

void Get()
{
    int p = 1, m = strlen(s + 1);
    for(int i = 1; i <= m; i ++) 
        if(ch[p][s[i] - 'a']) p = ch[p][s[i] - 'a'];
        else return;
    rec[p] = max(rec[p], m);
}

int main()
{
    n = read(); scanf("%s", s + 1);
    for(int i = 1; i <= n; i ++) Ins(s[i] - 'a', i);
    m = read();
    for(int i = 1; i <= m; i ++)
        scanf("%s", s + 1), Get();
    Cal();
    printf("%d\n", ans);
    return 0;
}

  2.[SDOI2016]生成魔咒

#include <bits/stdc++.h>
using namespace std;
#define maxn 400000
#define LL long long
int n, tot = 1, last = 1, fa[maxn];
LL ans, l[maxn];
map <int, int> ch[maxn];

int read()
{
    int x = 0, k = 1;
    char c; c = getchar();
    while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); }
    while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar();
    return x * k;
}

void Ins(int c)
{
    int np = ++ tot, p = last; 
    l[np] = l[p] + 1; last = np;
    for(; p && !ch[p][c]; p = fa[p]) ch[p][c] = np;
    if(!p) fa[np] = 1, ans += l[np] - l[1];
    else 
    {
        int q = ch[p][c];
        if(l[p] + 1 == l[q]) fa[np] = q, ans += l[np] - l[q];
        else
        {
            int nq = ++ tot; l[nq] = l[p] + 1;
            ch[nq] = ch[q]; ans -= l[q] - l[fa[q]]; 
            fa[nq] = fa[q], ans += l[nq] - l[fa[nq]];
            fa[q] = fa[np] = nq, ans += l[q] + l[np] - 2 * l[nq];
            for(; ch[p][c] == q; p = fa[p]) ch[p][c] = nq;
        }
    }
}

int main()
{
    n = read();
    for(int i = 1; i <= n; i ++)
    {
        int x = read();
        Ins(x);
        printf("%lld\n", ans);
    }
    return 0;
}

  3.[ZJOI2015]诸神眷顾的幻想乡

#include <bits/stdc++.h>
using namespace std;
#define maxn 2000000
int n, C, last = 1, cnt = 1, fa[maxn];
int rec[maxn], s[maxn], deg[maxn];
int ch[maxn][15], l[maxn];
long long ans;

int read()
{
    int x = 0, k = 1;
    char c; c = getchar();
    while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); }
    while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar();
    return x * k; 
}

struct edge
{
    int cnp, to[maxn], last[maxn], head[maxn];
    edge() { cnp = 2; }
    void add(int u, int v)
    {
        to[cnp] = v, last[cnp] = head[u], head[u] = cnp ++;
        to[cnp] = u, last[cnp] = head[v], head[v] = cnp ++;
    }
}E1;

void Ins(int c, int x)
{
    int np = ++ cnt, p = last; rec[x] = cnt; l[np] = l[p] + 1;
    while(p && !ch[p][c]) ch[p][c] = np, p = fa[p];
    if(!p) fa[np] = 1;
    else
    {
        int q = ch[p][c];
        if(l[q] == l[p] + 1) fa[np] = q;
        else 
        {
            int nq = ++ cnt; l[nq] = l[p] + 1;
            memcpy(ch[nq], ch[q], sizeof(ch[q]));
            fa[nq] = fa[q], fa[q] = fa[np] = nq;
            while(ch[p][c] == q) ch[p][c] = nq, p = fa[p];
        }
    }
}

void dfs(int u, int fa)
{
    last = rec[fa]; Ins(s[u], u);
    for(int i = E1.head[u]; i; i = E1.last[i])
    {
        int v = E1.to[i]; 
        if(v != fa) dfs(v, u);
    } 
}

int main()
{
    n = read(), C = read();
    for(int i = 1; i <= n; i ++) s[i] = read();
    for(int i = 1; i < n; i ++) 
    {
        int u = read(), v = read();
        E1.add(u, v); deg[u] ++, deg[v] ++;
    }
    rec[0] = 1;
    for(int i = 1; i <= n; i ++)
        if(deg[i] == 1) dfs(i, 0); 
    for(int i = 1; i <= cnt; i ++) ans += l[i] - l[fa[i]];
    printf("%lld\n", ans);
    return 0;
}

  4.[CTSC2012]熟悉的文章

#include <bits/stdc++.h>
using namespace std;
#define maxn 4000000
#define db double
int n, m, last, tot = 1, ch[maxn][2], l[maxn], fa[maxn];
int head, tail, g[maxn], f[maxn], q[maxn];
char s[maxn];

int read()
{
    int x = 0, k = 1;
    char c; c = getchar();
    while(c < '0' || c > '9') { if(c == '-') k = -1; c = getchar(); }
    while(c >= '0' && c <= '9') x = x * 10 + c - '0', c = getchar();
    return x * k;
}

void Ins(int x)
{
    int np = ++ tot, p = last;
    last = np, l[np] = l[p] + 1;
    for(; p && !ch[p][x]; p = fa[p]) ch[p][x] = np;
    if(!p) fa[np] = 1;
    else
    {
        int q = ch[p][x];
        if(l[q] == l[p] + 1) fa[np] = q;
        else
        {
            int nq = ++ tot; l[nq] = l[p] + 1;
            memcpy(ch[nq], ch[q], sizeof(ch[q]));
            fa[nq] = fa[q], fa[q] = fa[np] = nq;
            for(; ch[p][x] == q; p = fa[p]) ch[p][x] = nq;
        }
    }
}

void Get_pos(int n)
{
    int p = 1;
    for(int i = 1, now = 1, len = 0; i <= n; i ++)
    {
        int x = s[i] - '0';
        while(p && !ch[p][x]) p = fa[p], len = l[p];
        if(ch[p][x]) p = ch[p][x], len ++;
        else p = 1, len = 0; g[i] = i - len + 1; 
    }
} 

void Push_in(int x) {
    while(head <= tail && f[q[tail]] - q[tail] <= f[x] - x) tail --;
    q[++ tail] = x;
}

bool DP(int n, int L)
{
    f[0] = 0; int X = ceil((db) n * 0.9); head = 1, tail = 0;
    for(int i = 1, last = 0; i <= n; i ++)
    {
        while(last <= i - L) Push_in(last), last ++;
        while(head <= tail && q[head] < g[i] - 1) head ++;
        f[i] = max(head <= tail ? f[q[head]] - q[head] + i : 0, f[i - 1]);
    }
    return f[n] >= X;
}

int Binary(int n)
{
    int l = 1, r = n, ans = 0;
    while(l <= r)
    {
        int mid = (l + r) >> 1;
        if(DP(n, mid)) ans = mid, l = mid + 1;
        else r = mid - 1;
    }
    return ans;
}

int main()
{
    n = read(), m = read();
    for(int i = 1; i <= m; i ++)
    {
        scanf("%s", s + 1); int l = strlen(s + 1); last = 1;
        for(int j = 1; j <= l; j ++) Ins(s[j] - '0');
    }
    for(int i = 1; i <= n; i ++)
    {
        scanf("%s", s + 1); int l = strlen(s + 1);
        Get_pos(l); printf("%d\n", Binary(l));
    }
    return 0;
}

猜你喜欢

转载自www.cnblogs.com/twilight-sx/p/10056255.html