后缀数组应用总结

后缀数组应用总结

解决字符串问题的首选方案

(时间复杂度可以达到O(NLlogL)。其中N为字符串个数,L为每个串的长度)

1. 求两个字符串的最长公共子串
2. 求多个字符串的最长公共子串
3. 给定两个字符串A和B,求长度不小于k的公共子串的个数(可以相同)
4. 定n个字符串,求出现在不小于k个字符串中的最长子串。
5. 给定n个字符串,求在每个字符串中至少出现两次且不重叠的最长子串。
6. 给定n个字符串,求出现或反转后出现在每个字符串中的最长子串。
7. 要求所有正向或者反向出现在超过k/2个串中的子串
8. 长度最少为5的最长重复子串
9. 找出出现k次的可重叠的最长子串的长度
10. 求不同子串的个数
11. 求最长回文子串
12. 求字符串最多的循环次数
13. 求重复次数最多的连续重复子串,并且要求字典序最小的
14. 求字符串中所有出现至少2次的子串个数

//sa:字典序中排第i位的起始位置在str中第sa[i] sa[1~n]为有效值
//rank:就是str第i个位置的后缀是在字典序排第几 rank[0~n-1]为有效值
//height:字典序排i和i-1的后缀的最长公共前缀 height[2~n]为有效值,第二个到最后一个
//height 两个连续后缀的最长公共前缀,即公共子串长度

公共子串
题意:求两个字符串的最长公共子串
https://blog.csdn.net/libin56842/article/details/46128353

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define W(a) while(a)
#define UP(i,x,y) for(i=x;i<=y;i++)
#define N 200005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N];
char str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]  sa[1~n]为有效值
//rank:就是str第i个位置的后缀是在字典序排第几 rank[0~n-1]为有效值
//height:字典序排i和i-1的后缀的最长公共前缀  height[2~n]为有效值,第二个到最后一个
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

int main()
{
    int i,j,k,len,n;
    W(~scanf("%s%s",str1,str2))
    {
        len = strlen(str1);
        n = 0;
        UP(i,0,len-1)
        s[n++] = str1[i]-'a'+1;
        s[n++] = 30;
        len = strlen(str2);
        UP(i,0,len-1)
        s[n++] = str2[i]-'a'+1;
        s[n] = 0;
        getsa(s,sa,n+1,31);
        getheight(s,n);
        len = strlen(str1);
        int ans = 0;
        UP(i,2,n-1)
        {
            if(height[i]>ans)
            {
                if(sa[i-1]>=0 && sa[i-1]<len && sa[i]>=len)
                ans = max(ans,height[i]);
                if(sa[i]>=0 && sa[i]<len && sa[i-1]>=len)
                ans = max(ans,height[i]);
            }
        }
        printf("%d\n",ans);
    }

    return 0;
}

题意:求多个字符串的最长公共子串
https://blog.csdn.net/libin56842/article/details/46430867

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define UP(i,x,y) for(i=x;i<=y;i++)
#define MEM(a,x) memset(a,x,sizeof(a))
#define N 1000005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

char str[N],ans[N];
int id[N],vis[4005];

bool check(int mid,int n,int k)
{
    int i,j,cnt = 0;
    MEM(vis,0);
    for(i = 2; i<=n; i++)
    {
        if(height[i]<mid)
        {
            MEM(vis,0);
            cnt = 0;
            continue;
        }
        if(!vis[id[sa[i-1]]])
        {
            cnt++;
            vis[id[sa[i-1]]] = 1;
        }
        if(!vis[id[sa[i]]])
        {
            cnt++;
            vis[id[sa[i]]] = 1;
        }
        if(cnt == k)
        {
            for(j = 0; j<mid; j++)
                ans[j] = s[sa[i]+j];
            ans[mid] = '\0';
            return 1;
        }
    }
    return 0;
}

int main()
{
    int n,i,j,k,len;
    while(~scanf("%d",&k),k)
    {
        n = 0;
        for(i = 0; i<k; i++)
        {
            scanf("%s",str);
            len = strlen(str);
            for(j = 0; j<len; j++)
            {
                s[n] = str[j];
                id[n] = i;
                n++;
            }
            s[n] = '#'+i;
            id[n] = '#'+i;
            n++;
        }
        s[n] = 0;
        getsa(s,sa,n+1,5000);
        getheight(s,n);
        int l = 1,r = len,mid,flag = 0;
        while(l<=r)
        {
            mid = (l+r)/2;
            if(check(mid,n,k))
            {
                flag = 1;
                l=mid+1;
            }
            else
                r=mid-1;
        }
        if(flag)
            printf("%s\n",ans);
        else
            printf("IDENTITY LOST\n");
    }

    return 0;
}

题意: 给定两个字符串A和B,求长度不小于k的公共子串的个数(可以相同)
https://blog.csdn.net/libin56842/article/details/46404323

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define UP(i,x,y) for(i=x;i<=y;i++)
#define MEM(a,x) memset(a,x,sizeof(a))
#define LL long long
#define N (2*100000+10)

int wa[N],wb[N],wm[N],wv[N],sa[N];
int *rank,height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀

bool cmp(int *r,int a,int b,int l)
{
    return r[a] == r[b] && r[a+l] == r[b+l];
}

void getsa(int *r,int *sa,int n,int m)
{
    int *x=wa,*y=wb,*t;
    for(int i=0; i<m; ++i)wm[i]=0;
    for(int i=0; i<n; ++i)wm[x[i]=r[i]]++;
    for(int i=1; i<m; ++i)wm[i]+=wm[i-1];
    for(int i=n-1; i>=0; --i)sa[--wm[x[i]]]=i;
    for(int i=0,j=1,p=0; p<n; j=j*2,m=p)
    {
        for(p=0,i=n-j; i<n; ++i)y[p++]=i;
        for(i=0; i<n; ++i)if(sa[i]>=j)y[p++]=sa[i]-j;
        for(i=0; i<m; ++i)wm[i]=0;
        for(i=0; i<n; ++i)wm[x[y[i]]]++;
        for(i=1; i<m; ++i)wm[i]+=wm[i-1];
        for(i=n-1; i>=0; --i)sa[--wm[x[y[i]]]]=y[i];
        for(t=x,x=y,y=t,i=p=1,x[sa[0]]=0; i<n; ++i)
        {
            x[sa[i]]=cmp(y,sa[i],sa[i-1],j)?p-1:p++;
        }
    }
    rank=x;
}

void getheight(int *r,int *sa,int n)
{
    for(int i=0,j=0,k=0; i<n; height[rank[i++]]=k)
    {
        for(k?--k:0,j=sa[rank[i]-1]; r[i+k] == r[j+k]; ++k);
    }
}
int k;
char s1[N];
int len1;

LL solve(int n,int len,int k)
{
    int *mark=wa,*sta=wb,top=0,i;
    LL sum=0,num[3]= {0};
    for(i = 1;i<=n;i++)
    {
        if(height[i]<k)
        {
            top = num[1] = num[2] =0;
        }
        else
        {
            for(int size = top; size&&sta[size]>height[i]-k+1; size--)
            {
                num[mark[size]] += height[i]-k+1-sta[size];
                sta[size] = height[i]-k+1;
            }
            sta[++top] = height[i]-k+1;
            if(sa[i-1]<len) mark[top] = 1;
            if(sa[i-1]>len) mark[top] = 2;
            num[mark[top]]+=height[i]-k+1;
            if(sa[i]<len) sum+=num[2];
            if(sa[i]>len) sum+=num[1];
        }
    }
    return sum;
}

int main()
{
    int i,j;
    while(~scanf("%d",&k),k)
    {
        scanf("%s",s1);
        int n = 0;
        for(n = 0;s1[n]!='\0';n++)
            s[n] = s1[n];
        s[len1=n] = '#';
        scanf("%s",s1+n+1);
        n++;
        for(;s1[n]!='\0';n++)
            s[n] = s1[n];
        s[n] = 0;
        getsa(s,sa,n+1,201);
        getheight(s,sa,n);
        printf("%lld\n",solve(n,len1,k));
    }
    return 0;
}

题意:给定n个字符串,求出现在不小于k个字符串中的最长子串。
https://blog.csdn.net/libin56842/article/details/46409447

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1000005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

char str[N];
int len[105],size,ans[N];
bool vis[105];

int check(int mid,int n,int k)
{
    int i,j;
    int size = 0,cnt = 0;
    MEM(vis,false);
    for(i = 1; i<=n; i++)
    {
        if(height[i]>=mid)
        {
            for(j = 1; j<=k; j++)
            {
                //把sa[i-1]或sa[i]所在的字符串给标记,同样的串不重复累加
                if(sa[i]>len[j-1]&&sa[i]<len[j]) cnt+=(vis[j]?0:1),vis[j]=true;
                if(sa[i-1]>len[j-1]&&sa[i-1]<len[j]) cnt+=(vis[j]?0:1),vis[j]=true;
            }
        }
        else
        {
            if(cnt>k/2) ans[++size] = sa[i-1];
            cnt = 0;
            MEM(vis,false);
        }
    }
    if(cnt>k/2) ans[++size] = sa[n];
    if(size)
    {
        ans[0] = size;
        return 1;
    }
    return 0;
}

int main()
{
    int n,k,i,j,flag = 0;
    while(~scanf("%d",&k),k)
    {
        n = 0;
        size = 0;
        for(i = 1; i<=k; i++)
        {
            scanf("%s",str+n);
            for(; str[n]!='\0'; n++)
                s[n] = str[n];
            s[n] = '#'+i;
            len[++size] = n;
            n++;
        }
        s[n-1] = 0;
        getsa(s,sa,n,255);
        getheight(s,n-1);
        int l=1,r=n,mid;
        while(l<=r)
        {
            mid = (l+r)/2;
            if(check(mid,n,k)) l = mid+1;
            else r = mid-1;
        }
        if(flag)
            puts("");
        flag = 1;
        if(l==1)
            puts("?");
        else
        {
            for(i = 1; i<=ans[0]; i++)
            {
                for(j = ans[i]; j<ans[i]+l-1; j++)
                    printf("%c",s[j]);
                puts("");
            }
        }
    }

    return 0;
}

题意:给定n个字符串,求在每个字符串中至少出现两次且不重叠的最长子串。
https://blog.csdn.net/libin56842/article/details/46410431

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define N 100005
#define INF 0x3f3f3f3f
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

char str[N];
int id[N],maxn[N],minn[N];

bool check(int mid,int n,int k)
{
    int i,j;
    for(i = 0; i<=k; i++)
    {
        maxn[i] = 0;
        minn[i] = INF;
    }
    for(i = 1; i<=n; i++)
    {
        if(height[i]<mid)
        {
            for(j = 0; j<=k; j++)
            {
                maxn[j] = 0;
                minn[j] = INF;
            }
            maxn[id[sa[i]]] = sa[i];
            minn[id[sa[i]]] = sa[i];
        }
        else
        {
            //求出第id[sa[i]]个串中,所使用头和尾,差值必须大于等于二分的答案
            maxn[id[sa[i]]] = max(maxn[id[sa[i]]],sa[i]);
            minn[id[sa[i]]] = min(minn[id[sa[i]]],sa[i]);
            maxn[id[sa[i-1]]] = max(maxn[id[sa[i-1]]],sa[i-1]);
            minn[id[sa[i-1]]] = min(minn[id[sa[i-1]]],sa[i-1]);
            for(j = 0; j<k; j++)
            {
                if(maxn[j]-minn[j]<mid)
                    break;
            }
            if(j==k) return true;
        }
    }
    return false;
}

int main()
{
    int t,n,i,j,k;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%d",&k);
        n = 0;
        for(i = 0; i<k; i++)
        {
            scanf("%s",str+n);
            for(; str[n]!='\0'; n++)
            {
                s[n] = str[n];
                id[n] = i;
            }
            s[n++] = '#'+i;
        }
        s[n-1] = 0;
        getsa(s,sa,n,255);
        getheight(s,n-1);
        int l = 0,r = 10000,mid,ans = 0;
        while(l<=r)
        {
            mid = (l+r)/2;
            if(check(mid,n,k))
            {
                ans = mid;
                l = mid+1;
            }
            else r = mid-1;
        }
        printf("%d\n",ans);
    }

    return 0;
}

题意:给定n个字符串,求出现或反转后出现在每个字符串中的最长子串。
https://blog.csdn.net/libin56842/article/details/46412209

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;


#define MEM(a,x) memset(a,x,sizeof(a))

#define N 1000005


int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

char str[N];
int len[105],size,ans[N],id[N];
bool vis[105];

bool check(int mid,int n,int k)
{
    int i,j;
    int size = 0,cnt = 0;
    MEM(vis,false);
    for(i = 1; i<=n; i++)
    {
        if(height[i]>=mid)
        {
            for(j = 0; j<k; j++)
            {
              if(id[sa[i]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
              if(id[sa[i-1]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
            }
        }
        else
        {
            if(cnt>=k) return true;
            cnt = 0;
            MEM(vis,false);
        }
    }
    if(cnt>=k) return true;
    return false;
}

int main()
{
    int n,k,i,j,flag = 0,t;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%d",&k);
        n = 0;
        size = 0;
        int p = 1;
        for(i = 0; i<k; i++)
        {
            scanf("%s",str);
            int ll = strlen(str);
            for(j = 0; j<ll; j++)
            {
                id[n] = i;
                s[n++] = str[j];
            }
            s[n++] = '#'+(p++);
            for(j = ll-1; j>=0; j--)
            {
                id[n] = i;
                s[n++] = str[j];
            }
            s[n++] = '#'+(p++);
        }
        s[n-1] = 0;
        getsa(s,sa,n,255);
        getheight(s,n-1);
        int l=1,r=n,mid,ans = 0;
        while(l<=r)
        {
            mid = (l+r)/2;
            if(check(mid,n,k))
            {
                ans = mid;
                l = mid+1;
            }
            else r = mid-1;
        }
        printf("%d\n",ans);
    }

    return 0;
}

题意:要求所有正向或者反向出现在超过k/2个串中的子串
https://blog.csdn.net/libin56842/article/details/46439069

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;



#define MEM(a,x) memset(a,x,sizeof(a))


#define N 1000005


int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank1[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank1[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank1[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank1[i]]=k;
    }
}

char str[N];
int id[N];
map<string,int> mat,ans;
map<string,int>::iterator it;

int check(int x)//统计该状态包括几个串
{
    int i,cnt = 0;
    for(i = 1; i<=10; i++)
        if((1<<i)&x)
            cnt++;
    return cnt;
}

int main()
{
    int n,i,j,k,len;
    while(~scanf("%d",&k))
    {
        MEM(id,0);
        n = 0;
        int p = 200;
        for(i = 1; i<=k; i++)
        {
            scanf("%s",str);
            len = strlen(str);
            for(j = 0; j<len; j++)
            {
                id[n] = i;
                s[n++] = str[j];
            }
            s[n++] = p++;
            for(j = len-1; j>=0; j--)
                s[n++] = str[j];
            s[n++] = p++;
        }
        if(k == 1)
        {
            printf("%s\n",str);
            continue;
        }
        getsa(s,sa,n,p);
        getheight(s,n);
        int l = 1,r = 1000;
        ans.clear();
        while(l<=r)
        {
            int mid = (l+r)/2;
            i = 0;
            mat.clear();
            while(i<n)
            {
                if(height[i]>=mid)
                {
                    int tem = 1<<id[sa[i-1]];
                    len = 2000;
                    while(height[i]>=mid && i<n)//二进制记录串
                    {
                        tem |= (1<<id[sa[i]]);
                        len = min(len,height[i]);
                        i++;
                    }
                    if(tem!=1)
                    {
                        char s1[1005],s2[1005];
                        for(j = len-1; j>=0; j--)
                        {
                            s1[len-1-j] = s[sa[i-1]+j];
                            s2[j] = s[sa[i-1]+j];
                        }
                        s1[len] = s2[len] = '\0';
                        if(mat.find(string(s1)) != mat.end())
                            mat[string(s1)] |= tem;
                        else
                            mat[string(s2)] = tem;
                    }
                }
                i++;
            }
            int flag = 0;
            for(it = mat.begin(); it!=mat.end(); it++)
            {
                if(check(it->second) >= k/2+1)
                {
                    if(flag==0)
                    {
                        ans.clear();
                        flag = 1;
                    }
                    ans.insert(*it);
                }
            }
            if(flag==0) r = mid-1;
            else l = mid+1;
        }
        if(ans.size()==0)
            printf("NONE\n");
        else
        {
            for(it = ans.begin(); it!=ans.end(); it++)
            {
                printf("%s\n",it->first.c_str());
            }
        }
    }

    return 0;
}

最长重复子串
题意:长度最少为5的重复不重叠子串,先差分
https://blog.csdn.net/libin56842/article/details/46233687

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define UP(i,x,y) for(i=x;i<=y;i++)

#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)

#define N 20005

int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N],n;
char str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}
int ans;
int fun(int k)
{
    int i,maxn,minn;
    maxn = minn = sa[1];
    UP(i,2,n)
    {
        if(height[i]>=k && i<n)
        {
            minn = min(minn,sa[i]);
            maxn = max(maxn,sa[i]);
            continue;
        }
        if(maxn-minn>=k) return 1;
        maxn = minn = sa[i];
    }
    return 0;
}


int main()
{
    int i,j,k;
    W((~scanf("%d",&n),n))
    {
        UP(i,0,n-1)
        {
            scanf("%d",&s[i]);
        }
        UP(i,0,n-2)
        {
            s[i] = s[i+1]-s[i]+100;
        }
        s[--n] = 0;
        getsa(s,sa,n+1,200);
        getheight(s,n);
        int l = 4,r = n;
        W(l<=r)
        {
            int mid = (l+r)/2;
            if(fun(mid))
            {
                ans = mid;
                l=mid+1;
            }
            else r = mid-1;
        }
        ans++;
        printf("%d\n",ans<5?0:ans);
    }

    return 0;
}

至少出现k次的重复子串(可重叠)
题意:找出出现k次的可重叠的最长子串的长度
https://blog.csdn.net/libin56842/article/details/46236377

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 20005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}
int ans,n,m;
int fun(int k)
{
    int i,maxn,minn=sa[1],cnt = 1;
    UP(i,2,n)
    {
        if(height[i]>=k)//首先最长公共前缀肯定要大于现在枚举的长度
        {
            cnt++;//看连续的到底有几个
            minn = min(minn,sa[i]);//这一组中,长度最小的子串是多长
        }
        else
        {
            cnt = 1;//如果不行,那么重新分组
            minn = sa[i];
        }
        if(cnt>=m)//次数超过了,那么这个k长度下是可行的
            return 1;
    }
    return 0;
}


int main()
{
    int i,j,k,maxn;
    W((~scanf("%d%d",&n,&m)))
    {
        ans = maxn = 0;
        UP(i,0,n-1)
        {
            scanf("%d",&s[i]);
            maxn = max(maxn,s[i]);
        }
        s[n] = 0;
        getsa(s,sa,n+1,maxn+1);
        getheight(s,n);
        int l = 1,r = n;
        W(l<=r)
        {
            int mid = (l+r)/2;
            if(fun(mid))
            {
                ans = mid;
                l=mid+1;
            }
            else r = mid-1;
        }
        printf("%d\n",ans);
    }

    return 0;
}

不相同子串的个数
题意:求不同子串的个数
https://blog.csdn.net/libin56842/article/details/46236781

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}
int t,ans,n,m;

int main()
{
    int i,j,k,len;
    scanf("%d",&t);
    W(t--)
    {
        scanf("%s",str);
        len = strlen(str);
        UP(i,0,len-1)
        s[i]=str[i];
        s[len] = 0;
        getsa(s,sa,len+1,300);
        getheight(s,len);
        ans = (1+len)*len/2;
        UP(i,2,len)
        ans-=height[i];
        printf("%d\n",ans);
    }
}

最长回文子串
题意:求最长回文子串
https://blog.csdn.net/libin56842/article/details/46281255

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 2222
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

int main()
{
    int len,n=0,i,j,k;
    W(~scanf("%s",str))
    {
        n = 0;
        len = strlen(str);
        UP(i,0,len-1)
        s[n++] = str[i];
        s[n++] = 200;
        DOWN(i,len-1,0)
        s[n++] = str[i];
        s[n] = 0;
        getsa(s,sa,n+1,300);
        getheight(s,n);
        int ans = 1,x=0;
        UP(i,1,n-1)
        {
            int minn = min(sa[i],sa[i-1]);
            int maxn = max(sa[i],sa[i-1]);
            if(minn>=len || maxn<len) continue;//要分别在两个串中
            if(minn+height[i]!=n-maxn) continue;//minn+height[i]是两者公共前缀的最后一个,而这个对应的位置是在后一串的n-(n-sa[k]),也就是n-maxn位置
            if(height[i]>ans)
            {
                ans = height[i];
                x = minn;
            }
            else if(height[i]==ans)
            {
                x = min(minn,x);
            }
        }
        for(i=x; ans--; i++)
            printf("%c",str[i]);
        puts("");
    }
}

连续重复子串
题意:求字符串最多的循环次数
https://blog.csdn.net/libin56842/article/details/46310425

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1000005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8

#define F(x) ((x)/3+((x)%3==1?0:tb))
#define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2)
int wsf[N],wa[N],wb[N],wv[N],sa[N],rank[N],height[N],f[N];
int s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int c0(int *r,int a,int b)
{
    return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];
}
int c12(int k,int *r,int a,int b)
{
    if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1);
    else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];
}
void sort(int *r,int *a,int *b,int n,int m)
{
    int i;
    for(i=0; i<n; i++) wv[i]=r[a[i]];
    for(i=0; i<m; i++) wsf[i]=0;
    for(i=0; i<n; i++) wsf[wv[i]]++;
    for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--) b[--wsf[wv[i]]]=a[i];
    return;
}
void dc3(int *r,int *sa,int n,int m)
{
    int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
    r[n]=r[n+1]=0;
    for(i=0; i<n; i++) if(i%3!=0) wa[tbc++]=i;
    sort(r+2,wa,wb,tbc,m);
    sort(r+1,wb,wa,tbc,m);
    sort(r,wa,wb,tbc,m);
    for(p=1,rn[F(wb[0])]=0,i=1; i<tbc; i++)
        rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
    if(p<tbc) dc3(rn,san,tbc,p);
    else for(i=0; i<tbc; i++) san[rn[i]]=i;
    for(i=0; i<tbc; i++) if(san[i]<tb) wb[ta++]=san[i]*3;
    if(n%3==1) wb[ta++]=n-1;
    sort(r,wb,wa,ta,m);
    for(i=0; i<tbc; i++) wv[wb[i]=G(san[i])]=i;
    for(i=0,j=0,p=0; i<ta && j<tbc; p++)
        sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
    for(; i<ta; p++) sa[p]=wa[i++];
    for(; j<tbc; p++) sa[p]=wb[j++];
    return;
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

int rm[N];

void RMQ(int n)
{
    int k = rank[0];
    rm[k] = N;
    int i;
    DOWN(i,k-1,0)
    {
        if(height[i+1]<rm[i+1]) rm[i]=height[i+1];
        else rm[i]=rm[i+1];
    }
    UP(i,k+1,n)
    {
        if(height[i]<rm[i-1]) rm[i]=height[i];
        else rm[i]=rm[i-1];
    }
}

int solve(int n)
{
    int i;
    UP(i,1,n/2)
    {
        if(n%i) continue;
        if(rm[rank[i]]==n-i) return n/i;
    }
    return 1;
}

int main()
{
    int n,len,i,j,k;
    W(~scanf("%s",str))
    {
        if(str[0]=='.')
            break;
        len = strlen(str);
        UP(i,0,len-1)
        s[i]=str[i];
        s[len] = 0;
        dc3(s,sa,len+1,300);
        getheight(s,len);
        RMQ(len);
        printf("%d\n",solve(len));
    }
}

重复次数最多的连续重复子串
题意:求重复次数最多的连续重复子串,并且要求字典序最小的
https://blog.csdn.net/libin56842/article/details/46317153

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1000005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

int Log[N];
int best[20][N];

void setLog(int n)
{
    Log[0] = -1;
    for(int i=1; i<=n; i++)
    {
        Log[i]=(i&(i-1))?Log[i-1]:Log[i-1] + 1 ;
    }
}
void RMQ(int n) {//初始化RMQ
    for(int i = 1; i <= n ; i ++) best[0][i] = height[i];
    for(int i = 1; i <= Log[n] ; i ++) {
        int limit = n - (1<<i) + 1;
        for(int j = 1; j <= limit ; j ++) {
            best[i][j] = min(best[i-1][j] , best[i-1][j+(1<<i>>1)]);
        }
    }
}
int lcp(int a,int b) {//询问a,b后缀的最长公共前缀
    a = rank[a];    b = rank[b];
    if(a > b) swap(a,b);
    a ++;
    int t = Log[b - a + 1];
    return min(best[t][a] , best[t][b - (1<<t) + 1]);
}
int ans[N],len,cas = 1;
int main()
{
    int i,j,k,n,l;
    setLog(N-1);
    W(~scanf("%s",str))
    {
        if(str[0]=='#') break;
        n = strlen(str);
        UP(i,0,n-1)
        s[i] = str[i];
        s[n] = 0;
        getsa(s,sa,n+1,300);
        getheight(s,n);
        RMQ(n);
        int maxn = -1;
        len = 0;
        UP(l,1,n-1)
        {
            for(i=0;i+l<n;i+=l)
            {
                k = lcp(i,i+l);
                int m = k/l+1;
                int t = l-k%l;
                t = i-t;
                if(t>=0 && k%l)
                {
                    if(lcp(t,t+l)>=k) m++;
                }
                if(m>maxn)
                {
                    len = 0;
                    ans[len++] = l;
                    maxn = m;
                }
                else if(m == maxn)
                {
                    ans[len++] = l;
                }
            }
        }
        int start,flag = 0;
        UP(i,1,n)
        {
            if(flag)
            break;
            UP(j,0,len-1)
            {
                int tem = ans[j];
                if(lcp(sa[i],sa[i]+tem)>=(maxn-1)*tem)
                {
                    start = sa[i];
                    l = tem*maxn;
                    flag = 1;
                    break;
                }
            }
        }
        printf("Case %d: ",cas++);
        UP(i,0,l-1)
        printf("%c",str[start+i]);
        printf("\n");
    }
}

题意:求字符串中所有出现至少2次的子串个数
https://blog.csdn.net/libin56842/article/details/46431531

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;

#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 100005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank1[N],height[N],s[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank1[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank1[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank1[i]]=k;
    }
}

char str[N];

int main()
{
    int t,i,j,n,len;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%s",str);
        len = strlen(str);
        for(i = 0; i<len; i++)
            s[i] = str[i];
        s[len] = 0;
        getsa(s,sa,len+1,200);
        getheight(s,len);
        int cnt = 0,minn = 0,maxn = 0;
        for(i = 1; i<=len; i++)
        {
            if(height[i]>maxn)
            {
                maxn = height[i];
            }
            else
            {
                cnt+=maxn-minn;
                maxn = height[i];
                minn = height[i];
            }
        }
        cnt+=maxn-minn;
        printf("%d\n",cnt);
    }

    return 0;
}

计蒜客总结后缀数组:
后缀数组模板 处理了sa和height字符串从 1 开始 输入时用%s,ch+1读入,求字符串长度为strlen(ch+1)

void da()
{
	for (int i = 0; i < 130; i ++) cntA[i] = 0;
	for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
	for (int i = 1; i < 130; i ++) cntA[i] += cntA[i - 1];
	for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
	_rank[sa[1]] = 1;
	for (int i = 2; i <= n; i ++)
	{
		_rank[sa[i]] = _rank[sa[i - 1]];
		if (ch[sa[i]] != ch[sa[i - 1]]) _rank[sa[i]] ++;
	}
	for (int l = 1; _rank[sa[n]] < n; l <<= 1)
	{
		for (int i = 0; i <= n; i ++) cntA[i] = 0;
		for (int i = 0; i <= n; i ++) cntB[i] = 0;
		for (int i = 1; i <= n; i ++)
		{
			cntA[A[i] = _rank[i]] ++;
			cntB[B[i] = (i + l <= n) ? _rank[i + l] : 0] ++;
		}
		for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
		for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
		for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
		for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
		_rank[sa[1]] = 1;
		for (int i = 2; i <= n; i ++)
		{
			_rank[sa[i]] = _rank[sa[i - 1]];
			if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) _rank[sa[i]] ++;
		}
	}
	for (int i = 1, j = 0; i <= n; i ++)
	{
		if (j) j --;
		while (ch[i + j] == ch[sa[_rank[i] - 1] + j]) j ++;
		height[_rank[i]] = j;
	}
}

1.最长重复不重叠子串长度(差分)https://www.jisuanke.com/course/1177/62045 输出最长重复不重叠子串的长度

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;

const int maxn = 40005;
int arr[maxn], ch[maxn], A[maxn], B[maxn]; int n, l, r, mid, res;
int cntA[maxn], cntB[maxn], _rank[maxn], sa[maxn], tsa[maxn];
int height[maxn];

void da()
{
	for (int i = 0; i < 130; i ++) cntA[i] = 0;
	for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
	for (int i = 1; i < 130; i ++) cntA[i] += cntA[i - 1];
	for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
	_rank[sa[1]] = 1;
	for (int i = 2; i <= n; i ++)
	{
		_rank[sa[i]] = _rank[sa[i - 1]];
		if (ch[sa[i]] != ch[sa[i - 1]]) _rank[sa[i]] ++;
	}
	for (int l = 1; _rank[sa[n]] < n; l <<= 1)
	{
		for (int i = 0; i <= n; i ++) cntA[i] = 0;
		for (int i = 0; i <= n; i ++) cntB[i] = 0;
		for (int i = 1; i <= n; i ++)
		{
			cntA[A[i] = _rank[i]] ++;
			cntB[B[i] = (i + l <= n) ? _rank[i + l] : 0] ++;
		}
		for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
		for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
		for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
		for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
		_rank[sa[1]] = 1;
		for (int i = 2; i <= n; i ++)
		{
			_rank[sa[i]] = _rank[sa[i - 1]];
			if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) _rank[sa[i]] ++;
		}
	}
	for (int i = 1, j = 0; i <= n; i ++)
	{
		if (j) j --;
		while (ch[i + j] == ch[sa[_rank[i] - 1] + j]) j ++;
		height[_rank[i]] = j;
	}
}

bool check(int v)
{
	int left = sa[1], right = sa[1];
	for (int i = 2; i <= n; i++)
	{
		if (height[i] >= v)
		{
			left = min(sa[i], left);
			right = max(sa[i], right);
		}
		else
			left = right = sa[i];
		if (right - left >= v) return true;
	}
	return false;
}

int main()
{
	while (scanf("%d", &n) != EOF && n > 0)
	{
		for (int i = 1; i <= n; i++) scanf("%d", &arr[i]);
		for (int i = 1; i < n; i++) ch[i] = arr[i + 1] - arr[i] + 90;
		da();
		l = 1, r = n, res = 0;
		while (l <= r)
		{
			mid = (l + r) >> 1;
			if (check(mid))
			{
				res = mid;
				l = mid + 1;
			}
			else
				r = mid - 1;
		}
		res++;
		printf("%d\n", res);
	}
	return 0;
}

2.k次最长重复子串https://www.jisuanke.com/course/1177/62046 输出k次的最长重复子串

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;

const int maxn = 40005;
int n, k, l, r, mid, res; 
int ch[maxn], height[maxn];
int cntA[maxn], cntB[maxn];
int A[maxn], B[maxn];
int sa[maxn], tsa[maxn], rk[maxn];

void da()
{
	for (int i = 0; i < 256; i ++) cntA[i] = 0;
	for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
	for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
	for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
	rk[sa[1]] = 1;
	for (int i = 2; i <= n; i ++)
	{
		rk[sa[i]] = rk[sa[i - 1]];
		if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
	}
	for (int l = 1; rk[sa[n]] < n; l <<= 1)
	{
		for (int i = 0; i <= n; i ++) cntA[i] = 0;
		for (int i = 0; i <= n; i ++) cntB[i] = 0;
		for (int i = 1; i <= n; i ++)
		{
			cntA[A[i] = rk[i]] ++;
			cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
		}
		for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
		for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
		for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
		for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
		rk[sa[1]] = 1;
		for (int i = 2; i <= n; i ++)
		{
			rk[sa[i]] = rk[sa[i - 1]];
			if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
		}
	}
	for (int i = 1, j = 0; i <= n; i ++)
	{
		if (j) j --;
		while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
		height[rk[i]] = j;
	}
}

bool check(int v)
{
	int l = 1; if (v <= 1) return true;
	for (int i = 2; i <= n; i++)
	if (height[i] >= v)
	{
		if (i - l + 1 >= k) return true;
	}
	else
		l = i;
	return false;
}

int main()
{
	scanf("%d%d", &n, &k);
	for (int i = 1; i <= n; i++) scanf("%d", &ch[i]);
	da();
	l = 1, r = n, res = 0;
	while (l <= r)
	{
		int mid = (l + r) >> 1;
		if (check(mid))
		{
			res = mid;
			l = mid + 1;
		}
		else
			r = mid - 1;
	}
	printf("%d\n", res);
	return 0;
}

3.子串排名https://www.jisuanke.com/course/1177/62048 输出字典序为第k个的子串的首位置和末位置,从1开始

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
typedef long long ll;

const int maxn = 200005;
char ch[maxn]; int n, q, l, r, mid, res;
int A[maxn], B[maxn], cntA[maxn], cntB[maxn];
int sa[maxn], tsa[maxn], rk[maxn], height[maxn];
ll sum[maxn], k;

void da()
{
	for (int i = 0; i < 256; i ++) cntA[i] = 0;
	for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
	for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
	for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
	rk[sa[1]] = 1;
	for (int i = 2; i <= n; i ++)
	{
		rk[sa[i]] = rk[sa[i - 1]];
		if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
	}
	for (int l = 1; rk[sa[n]] < n; l <<= 1)
	{
		for (int i = 0; i <= n; i ++) cntA[i] = 0;
		for (int i = 0; i <= n; i ++) cntB[i] = 0;
		for (int i = 1; i <= n; i ++)
		{
			cntA[A[i] = rk[i]] ++;
			cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
		}
		for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
		for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
		for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
		for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
		rk[sa[1]] = 1;
		for (int i = 2; i <= n; i ++)
		{
			rk[sa[i]] = rk[sa[i - 1]];
			if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
		}
	}
	for (int i = 1, j = 0; i <= n; i ++)
	{
		if (j) j --;
		while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
		height[rk[i]] = j;
	}
}

int main()
{
	while (scanf("%s", ch + 1) != EOF)
	{
		n = strlen(ch + 1);
		da();
		for (int i = 1; i <= n; i++)
			sum[i] = sum[i - 1] + (n - sa[i] + 1 - height[i]);
		scanf("%d", &q); l = r = 0;
		while (q--)
		{
			scanf("%lld", &k); k ^= l; k ^= r; k++;
			if (k > sum[n]) l = r = 0;
			else
			{
				l = 1, r = n, res = 0;
				while (l <= r)
				{
					mid = (l + r) >> 1;
					if (sum[mid] >= k)
					{
						res = mid;
						r = mid - 1;
					}
					else
						l = mid + 1;
				}
				l = sa[res], r = sa[res] + height[res] + k - sum[res - 1] - 1;
				int len = r - l + 1;
				for (res++; res <= n && height[res] >= len; res++)
				if (sa[res] < l)
				l = sa[res], r = l + len - 1;
			}
			
			printf("%d %d\n", l, r);
		}
	}
	return 0;
}

4.重复不重叠子串个数https://www.jisuanke.com/course/1177/62051输出重复不重叠子串个数

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;

const int maxn = 2005;
char ch[maxn]; int n, l, r, res;
int A[maxn], B[maxn], cntA[maxn], cntB[maxn];
int sa[maxn], tsa[maxn], rk[maxn], height[maxn];

void da()
{
	for (int i = 0; i < 256; i ++) cntA[i] = 0;
	for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
	for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
	for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
	rk[sa[1]] = 1;
	for (int i = 2; i <= n; i ++)
	{
		rk[sa[i]] = rk[sa[i - 1]];
		if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
	}
	for (int l = 1; rk[sa[n]] < n; l <<= 1)
	{
		for (int i = 0; i <= n; i ++) cntA[i] = 0;
		for (int i = 0; i <= n; i ++) cntB[i] = 0;
		for (int i = 1; i <= n; i ++)
		{
			cntA[A[i] = rk[i]] ++;
			cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
		}
		for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
		for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
		for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
		for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
		rk[sa[1]] = 1;
		for (int i = 2; i <= n; i ++)
		{
			rk[sa[i]] = rk[sa[i - 1]];
			if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
		}
	}
	for (int i = 1, j = 0; i <= n; i ++)
	{
		if (j) j --;
		while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
		height[rk[i]] = j;
	}
}

int main()
{
	while (scanf("%s", ch + 1) != EOF && ch[1] != '#')
	{
		n = strlen(ch + 1); res = 0;
		da();
		for (int len = 1; len <= n / 2; len++)
		{
			l = r = sa[1];
			for (int i = 2; i <= n; i++)
				if (height[i] >= len)
				{
					l = min(sa[i], l);
					r = max(sa[i], r);
				}
				else
				{
					if (l + len <= r) res++;
					l = r = sa[i];
				}
			if (l + len <= r) res ++;
		}
		printf("%d\n", res);
	}
	return 0;
} 

5.最长公共子串 https://www.jisuanke.com/course/1177/62055 输出最长公共子串长度

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;

const int maxn = 200005;
int l1, l2, n, res;
char ch[maxn];
int sa[maxn], height[maxn];
int tsa[maxn], rk[maxn], A[maxn], B[maxn];
int cntA[maxn], cntB[maxn];

void da()
{
	for (int i = 0; i < 256; i ++) cntA[i] = 0;
	for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
	for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
	for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
	rk[sa[1]] = 1;
	for (int i = 2; i <= n; i ++)
	{
		rk[sa[i]] = rk[sa[i - 1]];
		if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
	}
	for (int l = 1; rk[sa[n]] < n; l <<= 1)
	{
		for (int i = 0; i <= n; i ++) cntA[i] = 0;
		for (int i = 0; i <= n; i ++) cntB[i] = 0;
		for (int i = 1; i <= n; i ++)
		{
			cntA[A[i] = rk[i]] ++;
			cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
		}
		for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
		for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
		for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
		for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
		rk[sa[1]] = 1;
		for (int i = 2; i <= n; i ++)
		{
			rk[sa[i]] = rk[sa[i - 1]];
			if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
		}
	}
	for (int i = 1, j = 0; i <= n; i ++)
	{
		if (j) j --;
		while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
		height[rk[i]] = j;
	}
}

int belong(int i)
{
	return (i <= l1) ? (-1) : 1;
}

int main()
{
	scanf("%s", ch + 1);
	l1 = strlen(ch + 1);
	ch[l1 + 1] = '$';
	scanf("%s", ch + l1 + 2);
	l2 = strlen(ch + l1 + 2);
	n = strlen(ch + 1);
	da();
	for (int i = 2; i <= n; i++)
		if (belong(sa[i - 1]) * belong(sa[i]) < 0)
			res = max(height[i], res);
	printf("%d\n", res);
	return 0;
}

6.求两个字符串长度大于等于k的公共子串个数
https://www.jisuanke.com/course/1177/62056

#include <stdio.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <string>
#include <algorithm>
#include <vector>
using namespace std;

typedef long long ll;

#define INF 0x7f7f7f7f
#define MAXN 200005

ll res;
int k, n, m;
char s[MAXN];
int rk[MAXN], sa[MAXN], tsa[MAXN], height[MAXN];
int A[MAXN], B[MAXN], cntA[MAXN], cntB[MAXN];

void da()
{
	for (int i = 0; i < 256; i ++) cntA[i] = 0;
	for (int i = 1; i <= n; i ++) cntA[s[i]] ++;
	for (int i = 0; i < 256; i ++) cntA[i + 1] += cntA[i];
	for (int i = n; i; i --) sa[cntA[s[i]] --] = i;
	rk[sa[1]] = 1;
	for (int i = 2; i <= n; i ++)
	{
		rk[sa[i]] = rk[sa[i - 1]];
		if (s[sa[i]] != s[sa[i - 1]]) rk[sa[i]] ++;
	}
	for (int l = 1; l < n && rk[sa[n]] < n; l <<= 1)
	{
		for (int i = 0; i <= n; i ++) cntA[i] = cntB[i] = 0;
		for (int i = 1; i <= n; i ++)
		{
			cntA[A[i] = rk[i]] ++;
			cntB[B[i] = i + l <= n ? rk[i + l] : 0] ++;
		}
		for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1], cntB[i] += cntB[i - 1];
		for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
		for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
		rk[sa[1]] = 1;
		for (int i = 2; i <= n; i ++)
		{
			rk[sa[i]] = rk[sa[i - 1]];
			if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
		} 
	}
	for (int i = 1, j = 0; i <= n; i ++)
	{
		if (j) j --;
		while (s[i + j] == s[sa[rk[i] - 1] + j]) j ++;
		height[rk[i]] = j;
	}
}

int stackA[MAXN], topA; ll sA;
int stackB[MAXN], topB; ll sB;

void solve()
{
	res = sA = sB = topA = topB = 0;
	for (int i = 3; i <= n; i ++)
		if (height[i] < k)
		{
			sA = sB = topA = topB = 0;
		} else {
			if (sa[i - 1] <= m)
			{
				int s = height[i] - k + 1, c = 1;
				while (topA > 0 && s <= stackA[topA])
				{
					sA -= 1ll * stackA[topA] * cntA[topA];
					c += cntA[topA];
					topA --;
				}
				sA += 1ll * s * c;
				topA ++; stackA[topA] = s; cntA[topA] = c;
				
				s = height[i] - k + 1, c = 0;
				while (topB > 0 && s <= stackB[topB])
				{
					sB -= 1ll * stackB[topB] * cntB[topB];
					c += cntB[topB];
					topB --;
				}
				if (c > 0)
				{
					sB += 1ll * s * c;
					topB ++; stackB[topB] = s; cntB[topB] = c;
				}
			} else {
				int s = height[i] - k + 1, c = 1;
				while (topB > 0 && s <= stackB[topB])
				{
					sB -= 1ll * stackB[topB] * cntB[topB];
					c += cntB[topB];
					topB --;
				}
				sB += 1ll * s * c;
				topB ++; stackB[topB] = s; cntB[topB] = c;
				
				s = height[i] - k + 1, c = 0;
				while (topA > 0 && s <= stackA[topA])
				{
					sA -= 1ll * stackA[topA] * cntA[topA];
					c += cntA[topA];
					topA --;
				}
				if (c > 0)
				{
					sA += 1ll * s * c;
					topA ++; stackA[topA] = s; cntA[topA] = c;
				}
			}
			if (sa[i] <= m)
				res += sB;
			else
				res += sA;
		}
}

int main()
{
	while (scanf("%d", &k) != EOF && k > 0)
	{
		scanf("%s", s + 1); m = strlen(s + 1);
		s[m + 1] = '$';
		scanf("%s", s + m + 2); n = strlen(s + 1);
		da();
		solve();
		printf("%lld\n", res);
	}
	return 0;
}

7.求多个字符串的最长公共子串(可反转) poj1226 输出最长公共子串的长度

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;


#define MEM(a,x) memset(a,x,sizeof(a))

#define N 1000005

int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
    return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0; i<m; i++)  wsf[i]=0;
    for(i=0; i<n; i++)  wsf[x[i]=r[i]]++;
    for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
    for(i=n-1; i>=0; i--)  sa[--wsf[x[i]]]=i;
    p=1;
    j=1;
    for(; p<n; j*=2,m=p)
    {
        for(p=0,i=n-j; i<n; i++)  y[p++]=i;
        for(i=0; i<n; i++)  if(sa[i]>=j)  y[p++]=sa[i]-j;
        for(i=0; i<n; i++)  wv[i]=x[y[i]];
        for(i=0; i<m; i++)  wsf[i]=0;
        for(i=0; i<n; i++)  wsf[wv[i]]++;
        for(i=1; i<m; i++)  wsf[i]+=wsf[i-1];
        for(i=n-1; i>=0; i--)  sa[--wsf[wv[i]]]=y[i];
        t=x;
        x=y;
        y=t;
        x[sa[0]]=0;
        for(p=1,i=1; i<n; i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
    }
}
void getheight(int *r,int n)//n不保存最后的0
{
    int i,j,k=0;
    for(i=1; i<=n; i++)  rank[sa[i]]=i;
    for(i=0; i<n; i++)
    {
        if(k)
            k--;
        else
            k=0;
        j=sa[rank[i]-1];
        while(r[i+k]==r[j+k])
            k++;
        height[rank[i]]=k;
    }
}

char str[N];
int len[105],size,ans[N],id[N];
bool vis[105];

bool check(int mid,int n,int k)
{
    int i,j;
    int size = 0,cnt = 0;
    MEM(vis,false);
    for(i = 1; i<=n; i++)
    {
        if(height[i]>=mid)
        {
            for(j = 0; j<k; j++)
            {
              if(id[sa[i]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
              if(id[sa[i-1]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
            }
        }
        else
        {
            if(cnt>=k) return true;
            cnt = 0;
            MEM(vis,false);
        }
    }
    if(cnt>=k) return true;
    return false;
}

int main()
{
    int n,k,i,j,flag = 0,t;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%d",&k);
        n = 0;
        size = 0;
        int p = 1;
        for(i = 0; i<k; i++)
        {
            scanf("%s",str);
            int ll = strlen(str);
            for(j = 0; j<ll; j++)
            {
                id[n] = i;
                s[n++] = str[j];
            }
            s[n++] = '#'+(p++);
            for(j = ll-1; j>=0; j--)
            {
                id[n] = i;
                s[n++] = str[j];
            }
            s[n++] = '#'+(p++);
        }
        s[n-1] = 0;
        getsa(s,sa,n,255);
        getheight(s,n-1);
        int l=1,r=n,mid,ans = 0;
        while(l<=r)
        {
            mid = (l+r)/2;
            if(check(mid,n,k))
            {
                ans = mid;
                l = mid+1;
            }
            else r = mid-1;
        }
        printf("%d\n",ans);
    }

    return 0;
}

8.求大于k/2个字符串中含有的最长公共子串(修改次数,修改ans)输出该子串 poj3294

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <queue>
#include <algorithm>
#include <map>
#include <iomanip>
#define INF 99999999;
typedef long long LL;
using namespace std;

const int MAX=200000+10;
int *rank,r[MAX],sa[MAX],height[MAX],size;
int wa[MAX],wb[MAX],wm[MAX],len[110],pos[MAX];
bool mark[110];
char s[MAX];

bool cmp(int *r,int a,int b,int l){
	return r[a] == r[b] && r[a+l] == r[b+l];
}

void makesa(int *r,int *sa,int n,int m){
	int *x=wa,*y=wb,*t;
	for(int i=0;i<m;++i)wm[i]=0;
	for(int i=0;i<n;++i)wm[x[i]=r[i]]++;
	for(int i=1;i<m;++i)wm[i]+=wm[i-1];
	for(int i=n-1;i>=0;--i)sa[--wm[x[i]]]=i;
	for(int i=0,j=1,p=0;p<n;j=j*2,m=p){
		for(p=0,i=n-j;i<n;++i)y[p++]=i;
		for(i=0;i<n;++i)if(sa[i]>=j)y[p++]=sa[i]-j;
		for(i=0;i<m;++i)wm[i]=0;
		for(i=0;i<n;++i)wm[x[y[i]]]++;
		for(i=1;i<m;++i)wm[i]+=wm[i-1];
		for(i=n-1;i>=0;--i)sa[--wm[x[y[i]]]]=y[i];
		for(t=x,x=y,y=t,i=p=1,x[sa[0]]=0;i<n;++i){
			x[sa[i]]=cmp(y,sa[i],sa[i-1],j)?p-1:p++;
		}
	}
	rank=x;
}

void calheight(int *r,int *sa,int n){
	for(int i=0,j=0,k=0;i<n;height[rank[i++]]=k){
		for(k?--k:0,j=sa[rank[i]-1];r[i+k] == r[j+k];++k);
	}
}

bool check(int mid,int n,int k){
	memset(mark,false,sizeof mark);
	int num=0,ans=0;
	for(int i=1;i<=n;++i){
		if(height[i]>=mid){
			for(int j=1;j<=size;++j){//判断sa[i]和sa[i-1]所属的字符串是否已被计算过 
				if(sa[i]>len[j-1] && sa[i]<len[j])ans+=(mark[j]?0:1),mark[j]=true;
				if(sa[i-1]>len[j-1] && sa[i-1]<len[j])ans+=(mark[j]?0:1),mark[j]=true;
			}
		}else{
			if(ans>k/2)pos[++num]=sa[i-1];//pos记录满足条件的子串首位置 
			ans=0;
			memset(mark,false,sizeof mark);
		}
	}
	if(ans>k/2)pos[++num]=sa[n];
	if(num){pos[0]=num;return true;} 
	return false;
}

int main(){
	int k,n,num=0;
	len[0]=-1;//注意len[0]初始化<0 
	while(cin>>k,k){
		pos[0]=size=n=0;//注意初始化 
		for(int i=0;i<k;++i){
			scanf("%s",s+n);
			for(;s[n] != '\0';++n)r[n]=s[n];
			r[len[++size]=n++]=300+i;//300+i保证每个分隔的字符不同 
		}
		r[n-1]=0;
		makesa(r,sa,n,400);
		calheight(r,sa,n-1);
		int L=1,R=n,mid;
		while(L<=R){
			mid=L+R>>1;
			if(check(mid,n,k))L=mid+1;
			else R=mid-1;
		}
		if(num++)printf("\n");
		if(L-1 == 0)printf("?\n");
		else{
			for(int i=1;i<=pos[0];++i){
				for(int j=pos[i];j<pos[i]+L-1;++j)printf("%c",s[j]);
				printf("\n");
			}
		}
		
	}
	return 0;
}

9.多个字符串求 出现2次以上且不重叠的子串个数 hdu3518 输出子串个数

#include <cstdio>
#include <cstring>
#define max(a,b) ((a)>(b)?(a):(b))
#define min(a,b) ((a)<(b)?(a):(b))

const int N = int(1e4)+10;
const int INF=0x3fffffff;

int cmp(int *r,int a,int b,int l){
    return (r[a]==r[b]) && (r[a+l]==r[b+l]);
}
int wa[N],wb[N],ws[N],wv[N];
int rank[N],height[N];

void DA(int *r,int *sa,int n,int m){
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0;i<m;i++) ws[i]=0;
    for(i=0;i<n;i++) ws[x[i]=r[i]]++;
    for(i=1;i<m;i++) ws[i]+=ws[i-1];
    for(i=n-1;i>=0;i--) sa[--ws[x[i]]]=i;
    for(j=1,p=1;p<n;j*=2,m=p)
    {
        for(p=0,i=n-j;i<n;i++) y[p++]=i;
        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        for(i=0;i<n;i++) wv[i]=x[y[i]];
        for(i=0;i<m;i++) ws[i]=0;
        for(i=0;i<n;i++) ws[wv[i]]++;
        for(i=1;i<m;i++) ws[i]+=ws[i-1];
        for(i=n-1;i>=0;i--) sa[--ws[wv[i]]]=y[i];
        for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        //printf("p = %d\n", p );
    }
}
void calheight(int *r,int *sa,int n){
  //  memset(height,0,sizeof(height));
  //  memset(rank,0,sizeof(rank));
    int i,j,k=0;
    for(i=1;i<=n;i++) rank[sa[i]]=i;
    for(i=0;i<n; height[rank[i++]] = k )
    for(k?k--:0,j=sa[rank[i]-1]; r[i+k]==r[j+k]; k++);
}

int data[N],sa[N],temp[N],n,k,start;
char str[N];

int Cal (int x)  
{  
	int maxn,minn,ans=0;
	for (int i=1;i<=n;i++)  
	{
		int L=i;  
		while (L<=n && height[L]<x) L++;  
		if (L>n) break;  
		int R=L;  
		while (R<=n && height[R]>=x) R++;  
		minn=INF,maxn=-INF;
		for (int j=L-1;j<=R-1;j++)
		{
			minn=min(minn,sa[j]);
			maxn=max(maxn,sa[j]);
		}
		if (maxn-minn>=x)
			ans++;
		i=R;  
	}  
	return ans;
} 

void Deal ()
{
	DA(data,sa,n+1,200);
	calheight(data,sa,n);
	int i,ans=0,temp;
	for (i=1;i<=n/2;i++)
	{
		temp=Cal(i);
		if (temp==0) //不可能有更长的了
			break;
		ans+=temp;
	}
	printf("%d\n",ans);
}

int main ()
{
	while (~scanf("%s",str),str[0]!='#')
	{
		n=strlen(str);	
		for (int i=0;i<n;i++)
			data[i]=(int)str[i];
		data[n] = 0;
        Deal();
	}
	return 0;
}

10.每个字符串中都至少出现2次的不重叠最长子串 spoj220 输出长度

#include <cstdio>
#include <cstring>
#include <cmath>
#include <algorithm>
using namespace std;
#define max(a,b) ((a)>(b)?(a):(b))
#define min(a,b) ((a)<(b)?(a):(b))

const int N = int(2e5)+100;

int cmp(int *r,int a,int b,int l){
    return (r[a]==r[b]) && (r[a+l]==r[b+l]);
}
int wa[N],wb[N],ws[N],wv[N];
int rank[N],height[N];

void DA(int *r,int *sa,int n,int m){
    int i,j,p,*x=wa,*y=wb,*t;
    for(i=0;i<m;i++) ws[i]=0;
    for(i=0;i<n;i++) ws[x[i]=r[i]]++;
    for(i=1;i<m;i++) ws[i]+=ws[i-1];
    for(i=n-1;i>=0;i--) sa[--ws[x[i]]]=i;
    for(j=1,p=1;p<n;j*=2,m=p)
    {
        for(p=0,i=n-j;i<n;i++) y[p++]=i;
        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
        for(i=0;i<n;i++) wv[i]=x[y[i]];
        for(i=0;i<m;i++) ws[i]=0;
        for(i=0;i<n;i++) ws[wv[i]]++;
        for(i=1;i<m;i++) ws[i]+=ws[i-1];
        for(i=n-1;i>=0;i--) sa[--ws[wv[i]]]=y[i];
        for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        //printf("p = %d\n", p );
    }
}
void calheight(int *r,int *sa,int n){
  //  memset(height,0,sizeof(height));
  //  memset(rank,0,sizeof(rank));
    int i,j,k=0;
    for(i=1;i<=n;i++) rank[sa[i]]=i;
    for(i=0;i<n; height[rank[i++]] = k )
    for(k?k--:0,j=sa[rank[i]-1]; r[i+k]==r[j+k]; k++);
}

int data[N],loc[N],sa[N];
int n,q,id,vis[15],start[15];
char str[10005];

bool Judge (int x)
{
	int i,j;
	bool flag=false;
	for (i=1;i<=n;i++)
	{
		int cnt=0;
		int L=i;
		while (L<=n && height[L]<x)
			L++;
		if (L>n) break;
		int R=L;
		while (R<=n && height[R]>=x)
			R++;
		memset(vis,0,sizeof(vis));
		memset(start,0,sizeof(start));
		for (j=L-1;j<=R-1;j++) if (loc[sa[j]]!=-1)
		{
			if (vis[loc[sa[j]]]==0) //之前没出现过
			{
				vis[loc[sa[j]]]=1; //出现一次
				start[loc[sa[j]]]=sa[j]; //记录本次出现时的起始位置
			}
			else
			{
				if (abs(sa[j]-start[loc[sa[j]]])>=x) //不重叠
					vis[loc[sa[j]]]++; //出现次数+1
			}
		}
		for (j=1;j<=q;j++)
			if (vis[j]>=2)
				cnt++;
		if (cnt==q)
            return true;
		i=R;
	}
	return false;
}

void Input ()
{
	int i,sign=130;//分隔符
	n=0;
	memset(data,0,sizeof(data));
	for (int id=1;id<=q;id++)
	{
		scanf("%s",str);
		int len=strlen(str);
		for (i=0;i<len;i++)
		{
			loc[n]=id;
			data[n++]=(int)str[i];
		}
		loc[n]=-1;
		data[n++]=sign++;   //注意分隔符需变化
	}
	data[n]=0;
	DA(data,sa,n+1,sign+5);
	calheight(data,sa,n);
}

int main ()
{
	int T;
	scanf("%d",&T);
	while (T--)
	{
		scanf("%d",&q);
		Input ();
		int low=0,high=n,mid,res=0;
		while (low<high)
		{
			mid = (low+high)>>1;
			if (Judge(mid))
				res=mid,low=mid+1;
			else
				high=mid;
		}
		printf("%d\n",res);
	}
	return 0;
}

猜你喜欢

转载自blog.csdn.net/w1304636468/article/details/89425988
今日推荐