后缀数组应用总结
解决字符串问题的首选方案
(时间复杂度可以达到O(NLlogL)。其中N为字符串个数,L为每个串的长度)
1. 求两个字符串的最长公共子串
2. 求多个字符串的最长公共子串
3. 给定两个字符串A和B,求长度不小于k的公共子串的个数(可以相同)
4. 定n个字符串,求出现在不小于k个字符串中的最长子串。
5. 给定n个字符串,求在每个字符串中至少出现两次且不重叠的最长子串。
6. 给定n个字符串,求出现或反转后出现在每个字符串中的最长子串。
7. 要求所有正向或者反向出现在超过k/2个串中的子串
8. 长度最少为5的最长重复子串
9. 找出出现k次的可重叠的最长子串的长度
10. 求不同子串的个数
11. 求最长回文子串
12. 求字符串最多的循环次数
13. 求重复次数最多的连续重复子串,并且要求字典序最小的
14. 求字符串中所有出现至少2次的子串个数
//sa:字典序中排第i位的起始位置在str中第sa[i] sa[1~n]为有效值
//rank:就是str第i个位置的后缀是在字典序排第几 rank[0~n-1]为有效值
//height:字典序排i和i-1的后缀的最长公共前缀 height[2~n]为有效值,第二个到最后一个
//height 两个连续后缀的最长公共前缀,即公共子串长度
公共子串
题意:求两个字符串的最长公共子串
https://blog.csdn.net/libin56842/article/details/46128353
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define W(a) while(a)
#define UP(i,x,y) for(i=x;i<=y;i++)
#define N 200005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N];
char str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i] sa[1~n]为有效值
//rank:就是str第i个位置的后缀是在字典序排第几 rank[0~n-1]为有效值
//height:字典序排i和i-1的后缀的最长公共前缀 height[2~n]为有效值,第二个到最后一个
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
int main()
{
int i,j,k,len,n;
W(~scanf("%s%s",str1,str2))
{
len = strlen(str1);
n = 0;
UP(i,0,len-1)
s[n++] = str1[i]-'a'+1;
s[n++] = 30;
len = strlen(str2);
UP(i,0,len-1)
s[n++] = str2[i]-'a'+1;
s[n] = 0;
getsa(s,sa,n+1,31);
getheight(s,n);
len = strlen(str1);
int ans = 0;
UP(i,2,n-1)
{
if(height[i]>ans)
{
if(sa[i-1]>=0 && sa[i-1]<len && sa[i]>=len)
ans = max(ans,height[i]);
if(sa[i]>=0 && sa[i]<len && sa[i-1]>=len)
ans = max(ans,height[i]);
}
}
printf("%d\n",ans);
}
return 0;
}
题意:求多个字符串的最长公共子串
https://blog.csdn.net/libin56842/article/details/46430867
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define UP(i,x,y) for(i=x;i<=y;i++)
#define MEM(a,x) memset(a,x,sizeof(a))
#define N 1000005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
char str[N],ans[N];
int id[N],vis[4005];
bool check(int mid,int n,int k)
{
int i,j,cnt = 0;
MEM(vis,0);
for(i = 2; i<=n; i++)
{
if(height[i]<mid)
{
MEM(vis,0);
cnt = 0;
continue;
}
if(!vis[id[sa[i-1]]])
{
cnt++;
vis[id[sa[i-1]]] = 1;
}
if(!vis[id[sa[i]]])
{
cnt++;
vis[id[sa[i]]] = 1;
}
if(cnt == k)
{
for(j = 0; j<mid; j++)
ans[j] = s[sa[i]+j];
ans[mid] = '\0';
return 1;
}
}
return 0;
}
int main()
{
int n,i,j,k,len;
while(~scanf("%d",&k),k)
{
n = 0;
for(i = 0; i<k; i++)
{
scanf("%s",str);
len = strlen(str);
for(j = 0; j<len; j++)
{
s[n] = str[j];
id[n] = i;
n++;
}
s[n] = '#'+i;
id[n] = '#'+i;
n++;
}
s[n] = 0;
getsa(s,sa,n+1,5000);
getheight(s,n);
int l = 1,r = len,mid,flag = 0;
while(l<=r)
{
mid = (l+r)/2;
if(check(mid,n,k))
{
flag = 1;
l=mid+1;
}
else
r=mid-1;
}
if(flag)
printf("%s\n",ans);
else
printf("IDENTITY LOST\n");
}
return 0;
}
题意: 给定两个字符串A和B,求长度不小于k的公共子串的个数(可以相同)
https://blog.csdn.net/libin56842/article/details/46404323
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define UP(i,x,y) for(i=x;i<=y;i++)
#define MEM(a,x) memset(a,x,sizeof(a))
#define LL long long
#define N (2*100000+10)
int wa[N],wb[N],wm[N],wv[N],sa[N];
int *rank,height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
bool cmp(int *r,int a,int b,int l)
{
return r[a] == r[b] && r[a+l] == r[b+l];
}
void getsa(int *r,int *sa,int n,int m)
{
int *x=wa,*y=wb,*t;
for(int i=0; i<m; ++i)wm[i]=0;
for(int i=0; i<n; ++i)wm[x[i]=r[i]]++;
for(int i=1; i<m; ++i)wm[i]+=wm[i-1];
for(int i=n-1; i>=0; --i)sa[--wm[x[i]]]=i;
for(int i=0,j=1,p=0; p<n; j=j*2,m=p)
{
for(p=0,i=n-j; i<n; ++i)y[p++]=i;
for(i=0; i<n; ++i)if(sa[i]>=j)y[p++]=sa[i]-j;
for(i=0; i<m; ++i)wm[i]=0;
for(i=0; i<n; ++i)wm[x[y[i]]]++;
for(i=1; i<m; ++i)wm[i]+=wm[i-1];
for(i=n-1; i>=0; --i)sa[--wm[x[y[i]]]]=y[i];
for(t=x,x=y,y=t,i=p=1,x[sa[0]]=0; i<n; ++i)
{
x[sa[i]]=cmp(y,sa[i],sa[i-1],j)?p-1:p++;
}
}
rank=x;
}
void getheight(int *r,int *sa,int n)
{
for(int i=0,j=0,k=0; i<n; height[rank[i++]]=k)
{
for(k?--k:0,j=sa[rank[i]-1]; r[i+k] == r[j+k]; ++k);
}
}
int k;
char s1[N];
int len1;
LL solve(int n,int len,int k)
{
int *mark=wa,*sta=wb,top=0,i;
LL sum=0,num[3]= {0};
for(i = 1;i<=n;i++)
{
if(height[i]<k)
{
top = num[1] = num[2] =0;
}
else
{
for(int size = top; size&&sta[size]>height[i]-k+1; size--)
{
num[mark[size]] += height[i]-k+1-sta[size];
sta[size] = height[i]-k+1;
}
sta[++top] = height[i]-k+1;
if(sa[i-1]<len) mark[top] = 1;
if(sa[i-1]>len) mark[top] = 2;
num[mark[top]]+=height[i]-k+1;
if(sa[i]<len) sum+=num[2];
if(sa[i]>len) sum+=num[1];
}
}
return sum;
}
int main()
{
int i,j;
while(~scanf("%d",&k),k)
{
scanf("%s",s1);
int n = 0;
for(n = 0;s1[n]!='\0';n++)
s[n] = s1[n];
s[len1=n] = '#';
scanf("%s",s1+n+1);
n++;
for(;s1[n]!='\0';n++)
s[n] = s1[n];
s[n] = 0;
getsa(s,sa,n+1,201);
getheight(s,sa,n);
printf("%lld\n",solve(n,len1,k));
}
return 0;
}
题意:给定n个字符串,求出现在不小于k个字符串中的最长子串。
https://blog.csdn.net/libin56842/article/details/46409447
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1000005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
char str[N];
int len[105],size,ans[N];
bool vis[105];
int check(int mid,int n,int k)
{
int i,j;
int size = 0,cnt = 0;
MEM(vis,false);
for(i = 1; i<=n; i++)
{
if(height[i]>=mid)
{
for(j = 1; j<=k; j++)
{
//把sa[i-1]或sa[i]所在的字符串给标记,同样的串不重复累加
if(sa[i]>len[j-1]&&sa[i]<len[j]) cnt+=(vis[j]?0:1),vis[j]=true;
if(sa[i-1]>len[j-1]&&sa[i-1]<len[j]) cnt+=(vis[j]?0:1),vis[j]=true;
}
}
else
{
if(cnt>k/2) ans[++size] = sa[i-1];
cnt = 0;
MEM(vis,false);
}
}
if(cnt>k/2) ans[++size] = sa[n];
if(size)
{
ans[0] = size;
return 1;
}
return 0;
}
int main()
{
int n,k,i,j,flag = 0;
while(~scanf("%d",&k),k)
{
n = 0;
size = 0;
for(i = 1; i<=k; i++)
{
scanf("%s",str+n);
for(; str[n]!='\0'; n++)
s[n] = str[n];
s[n] = '#'+i;
len[++size] = n;
n++;
}
s[n-1] = 0;
getsa(s,sa,n,255);
getheight(s,n-1);
int l=1,r=n,mid;
while(l<=r)
{
mid = (l+r)/2;
if(check(mid,n,k)) l = mid+1;
else r = mid-1;
}
if(flag)
puts("");
flag = 1;
if(l==1)
puts("?");
else
{
for(i = 1; i<=ans[0]; i++)
{
for(j = ans[i]; j<ans[i]+l-1; j++)
printf("%c",s[j]);
puts("");
}
}
}
return 0;
}
题意:给定n个字符串,求在每个字符串中至少出现两次且不重叠的最长子串。
https://blog.csdn.net/libin56842/article/details/46410431
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define N 100005
#define INF 0x3f3f3f3f
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
char str[N];
int id[N],maxn[N],minn[N];
bool check(int mid,int n,int k)
{
int i,j;
for(i = 0; i<=k; i++)
{
maxn[i] = 0;
minn[i] = INF;
}
for(i = 1; i<=n; i++)
{
if(height[i]<mid)
{
for(j = 0; j<=k; j++)
{
maxn[j] = 0;
minn[j] = INF;
}
maxn[id[sa[i]]] = sa[i];
minn[id[sa[i]]] = sa[i];
}
else
{
//求出第id[sa[i]]个串中,所使用头和尾,差值必须大于等于二分的答案
maxn[id[sa[i]]] = max(maxn[id[sa[i]]],sa[i]);
minn[id[sa[i]]] = min(minn[id[sa[i]]],sa[i]);
maxn[id[sa[i-1]]] = max(maxn[id[sa[i-1]]],sa[i-1]);
minn[id[sa[i-1]]] = min(minn[id[sa[i-1]]],sa[i-1]);
for(j = 0; j<k; j++)
{
if(maxn[j]-minn[j]<mid)
break;
}
if(j==k) return true;
}
}
return false;
}
int main()
{
int t,n,i,j,k;
scanf("%d",&t);
while(t--)
{
scanf("%d",&k);
n = 0;
for(i = 0; i<k; i++)
{
scanf("%s",str+n);
for(; str[n]!='\0'; n++)
{
s[n] = str[n];
id[n] = i;
}
s[n++] = '#'+i;
}
s[n-1] = 0;
getsa(s,sa,n,255);
getheight(s,n-1);
int l = 0,r = 10000,mid,ans = 0;
while(l<=r)
{
mid = (l+r)/2;
if(check(mid,n,k))
{
ans = mid;
l = mid+1;
}
else r = mid-1;
}
printf("%d\n",ans);
}
return 0;
}
题意:给定n个字符串,求出现或反转后出现在每个字符串中的最长子串。
https://blog.csdn.net/libin56842/article/details/46412209
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define MEM(a,x) memset(a,x,sizeof(a))
#define N 1000005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
char str[N];
int len[105],size,ans[N],id[N];
bool vis[105];
bool check(int mid,int n,int k)
{
int i,j;
int size = 0,cnt = 0;
MEM(vis,false);
for(i = 1; i<=n; i++)
{
if(height[i]>=mid)
{
for(j = 0; j<k; j++)
{
if(id[sa[i]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
if(id[sa[i-1]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
}
}
else
{
if(cnt>=k) return true;
cnt = 0;
MEM(vis,false);
}
}
if(cnt>=k) return true;
return false;
}
int main()
{
int n,k,i,j,flag = 0,t;
scanf("%d",&t);
while(t--)
{
scanf("%d",&k);
n = 0;
size = 0;
int p = 1;
for(i = 0; i<k; i++)
{
scanf("%s",str);
int ll = strlen(str);
for(j = 0; j<ll; j++)
{
id[n] = i;
s[n++] = str[j];
}
s[n++] = '#'+(p++);
for(j = ll-1; j>=0; j--)
{
id[n] = i;
s[n++] = str[j];
}
s[n++] = '#'+(p++);
}
s[n-1] = 0;
getsa(s,sa,n,255);
getheight(s,n-1);
int l=1,r=n,mid,ans = 0;
while(l<=r)
{
mid = (l+r)/2;
if(check(mid,n,k))
{
ans = mid;
l = mid+1;
}
else r = mid-1;
}
printf("%d\n",ans);
}
return 0;
}
题意:要求所有正向或者反向出现在超过k/2个串中的子串
https://blog.csdn.net/libin56842/article/details/46439069
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define MEM(a,x) memset(a,x,sizeof(a))
#define N 1000005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank1[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank1[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank1[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank1[i]]=k;
}
}
char str[N];
int id[N];
map<string,int> mat,ans;
map<string,int>::iterator it;
int check(int x)//统计该状态包括几个串
{
int i,cnt = 0;
for(i = 1; i<=10; i++)
if((1<<i)&x)
cnt++;
return cnt;
}
int main()
{
int n,i,j,k,len;
while(~scanf("%d",&k))
{
MEM(id,0);
n = 0;
int p = 200;
for(i = 1; i<=k; i++)
{
scanf("%s",str);
len = strlen(str);
for(j = 0; j<len; j++)
{
id[n] = i;
s[n++] = str[j];
}
s[n++] = p++;
for(j = len-1; j>=0; j--)
s[n++] = str[j];
s[n++] = p++;
}
if(k == 1)
{
printf("%s\n",str);
continue;
}
getsa(s,sa,n,p);
getheight(s,n);
int l = 1,r = 1000;
ans.clear();
while(l<=r)
{
int mid = (l+r)/2;
i = 0;
mat.clear();
while(i<n)
{
if(height[i]>=mid)
{
int tem = 1<<id[sa[i-1]];
len = 2000;
while(height[i]>=mid && i<n)//二进制记录串
{
tem |= (1<<id[sa[i]]);
len = min(len,height[i]);
i++;
}
if(tem!=1)
{
char s1[1005],s2[1005];
for(j = len-1; j>=0; j--)
{
s1[len-1-j] = s[sa[i-1]+j];
s2[j] = s[sa[i-1]+j];
}
s1[len] = s2[len] = '\0';
if(mat.find(string(s1)) != mat.end())
mat[string(s1)] |= tem;
else
mat[string(s2)] = tem;
}
}
i++;
}
int flag = 0;
for(it = mat.begin(); it!=mat.end(); it++)
{
if(check(it->second) >= k/2+1)
{
if(flag==0)
{
ans.clear();
flag = 1;
}
ans.insert(*it);
}
}
if(flag==0) r = mid-1;
else l = mid+1;
}
if(ans.size()==0)
printf("NONE\n");
else
{
for(it = ans.begin(); it!=ans.end(); it++)
{
printf("%s\n",it->first.c_str());
}
}
}
return 0;
}
最长重复子串
题意:长度最少为5的重复不重叠子串,先差分
https://blog.csdn.net/libin56842/article/details/46233687
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define UP(i,x,y) for(i=x;i<=y;i++)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define N 20005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N],n;
char str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
int ans;
int fun(int k)
{
int i,maxn,minn;
maxn = minn = sa[1];
UP(i,2,n)
{
if(height[i]>=k && i<n)
{
minn = min(minn,sa[i]);
maxn = max(maxn,sa[i]);
continue;
}
if(maxn-minn>=k) return 1;
maxn = minn = sa[i];
}
return 0;
}
int main()
{
int i,j,k;
W((~scanf("%d",&n),n))
{
UP(i,0,n-1)
{
scanf("%d",&s[i]);
}
UP(i,0,n-2)
{
s[i] = s[i+1]-s[i]+100;
}
s[--n] = 0;
getsa(s,sa,n+1,200);
getheight(s,n);
int l = 4,r = n;
W(l<=r)
{
int mid = (l+r)/2;
if(fun(mid))
{
ans = mid;
l=mid+1;
}
else r = mid-1;
}
ans++;
printf("%d\n",ans<5?0:ans);
}
return 0;
}
至少出现k次的重复子串(可重叠)
题意:找出出现k次的可重叠的最长子串的长度
https://blog.csdn.net/libin56842/article/details/46236377
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 20005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
int ans,n,m;
int fun(int k)
{
int i,maxn,minn=sa[1],cnt = 1;
UP(i,2,n)
{
if(height[i]>=k)//首先最长公共前缀肯定要大于现在枚举的长度
{
cnt++;//看连续的到底有几个
minn = min(minn,sa[i]);//这一组中,长度最小的子串是多长
}
else
{
cnt = 1;//如果不行,那么重新分组
minn = sa[i];
}
if(cnt>=m)//次数超过了,那么这个k长度下是可行的
return 1;
}
return 0;
}
int main()
{
int i,j,k,maxn;
W((~scanf("%d%d",&n,&m)))
{
ans = maxn = 0;
UP(i,0,n-1)
{
scanf("%d",&s[i]);
maxn = max(maxn,s[i]);
}
s[n] = 0;
getsa(s,sa,n+1,maxn+1);
getheight(s,n);
int l = 1,r = n;
W(l<=r)
{
int mid = (l+r)/2;
if(fun(mid))
{
ans = mid;
l=mid+1;
}
else r = mid-1;
}
printf("%d\n",ans);
}
return 0;
}
不相同子串的个数
题意:求不同子串的个数
https://blog.csdn.net/libin56842/article/details/46236781
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
int t,ans,n,m;
int main()
{
int i,j,k,len;
scanf("%d",&t);
W(t--)
{
scanf("%s",str);
len = strlen(str);
UP(i,0,len-1)
s[i]=str[i];
s[len] = 0;
getsa(s,sa,len+1,300);
getheight(s,len);
ans = (1+len)*len/2;
UP(i,2,len)
ans-=height[i];
printf("%d\n",ans);
}
}
最长回文子串
题意:求最长回文子串
https://blog.csdn.net/libin56842/article/details/46281255
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 2222
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
int main()
{
int len,n=0,i,j,k;
W(~scanf("%s",str))
{
n = 0;
len = strlen(str);
UP(i,0,len-1)
s[n++] = str[i];
s[n++] = 200;
DOWN(i,len-1,0)
s[n++] = str[i];
s[n] = 0;
getsa(s,sa,n+1,300);
getheight(s,n);
int ans = 1,x=0;
UP(i,1,n-1)
{
int minn = min(sa[i],sa[i-1]);
int maxn = max(sa[i],sa[i-1]);
if(minn>=len || maxn<len) continue;//要分别在两个串中
if(minn+height[i]!=n-maxn) continue;//minn+height[i]是两者公共前缀的最后一个,而这个对应的位置是在后一串的n-(n-sa[k]),也就是n-maxn位置
if(height[i]>ans)
{
ans = height[i];
x = minn;
}
else if(height[i]==ans)
{
x = min(minn,x);
}
}
for(i=x; ans--; i++)
printf("%c",str[i]);
puts("");
}
}
连续重复子串
题意:求字符串最多的循环次数
https://blog.csdn.net/libin56842/article/details/46310425
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1000005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
#define F(x) ((x)/3+((x)%3==1?0:tb))
#define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2)
int wsf[N],wa[N],wb[N],wv[N],sa[N],rank[N],height[N],f[N];
int s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int c0(int *r,int a,int b)
{
return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];
}
int c12(int k,int *r,int a,int b)
{
if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1);
else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];
}
void sort(int *r,int *a,int *b,int n,int m)
{
int i;
for(i=0; i<n; i++) wv[i]=r[a[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) b[--wsf[wv[i]]]=a[i];
return;
}
void dc3(int *r,int *sa,int n,int m)
{
int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
r[n]=r[n+1]=0;
for(i=0; i<n; i++) if(i%3!=0) wa[tbc++]=i;
sort(r+2,wa,wb,tbc,m);
sort(r+1,wb,wa,tbc,m);
sort(r,wa,wb,tbc,m);
for(p=1,rn[F(wb[0])]=0,i=1; i<tbc; i++)
rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
if(p<tbc) dc3(rn,san,tbc,p);
else for(i=0; i<tbc; i++) san[rn[i]]=i;
for(i=0; i<tbc; i++) if(san[i]<tb) wb[ta++]=san[i]*3;
if(n%3==1) wb[ta++]=n-1;
sort(r,wb,wa,ta,m);
for(i=0; i<tbc; i++) wv[wb[i]=G(san[i])]=i;
for(i=0,j=0,p=0; i<ta && j<tbc; p++)
sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
for(; i<ta; p++) sa[p]=wa[i++];
for(; j<tbc; p++) sa[p]=wb[j++];
return;
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
int rm[N];
void RMQ(int n)
{
int k = rank[0];
rm[k] = N;
int i;
DOWN(i,k-1,0)
{
if(height[i+1]<rm[i+1]) rm[i]=height[i+1];
else rm[i]=rm[i+1];
}
UP(i,k+1,n)
{
if(height[i]<rm[i-1]) rm[i]=height[i];
else rm[i]=rm[i-1];
}
}
int solve(int n)
{
int i;
UP(i,1,n/2)
{
if(n%i) continue;
if(rm[rank[i]]==n-i) return n/i;
}
return 1;
}
int main()
{
int n,len,i,j,k;
W(~scanf("%s",str))
{
if(str[0]=='.')
break;
len = strlen(str);
UP(i,0,len-1)
s[i]=str[i];
s[len] = 0;
dc3(s,sa,len+1,300);
getheight(s,len);
RMQ(len);
printf("%d\n",solve(len));
}
}
重复次数最多的连续重复子串
题意:求重复次数最多的连续重复子串,并且要求字典序最小的
https://blog.csdn.net/libin56842/article/details/46317153
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 1000005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
char str[N],str1[N],str2[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
int Log[N];
int best[20][N];
void setLog(int n)
{
Log[0] = -1;
for(int i=1; i<=n; i++)
{
Log[i]=(i&(i-1))?Log[i-1]:Log[i-1] + 1 ;
}
}
void RMQ(int n) {//初始化RMQ
for(int i = 1; i <= n ; i ++) best[0][i] = height[i];
for(int i = 1; i <= Log[n] ; i ++) {
int limit = n - (1<<i) + 1;
for(int j = 1; j <= limit ; j ++) {
best[i][j] = min(best[i-1][j] , best[i-1][j+(1<<i>>1)]);
}
}
}
int lcp(int a,int b) {//询问a,b后缀的最长公共前缀
a = rank[a]; b = rank[b];
if(a > b) swap(a,b);
a ++;
int t = Log[b - a + 1];
return min(best[t][a] , best[t][b - (1<<t) + 1]);
}
int ans[N],len,cas = 1;
int main()
{
int i,j,k,n,l;
setLog(N-1);
W(~scanf("%s",str))
{
if(str[0]=='#') break;
n = strlen(str);
UP(i,0,n-1)
s[i] = str[i];
s[n] = 0;
getsa(s,sa,n+1,300);
getheight(s,n);
RMQ(n);
int maxn = -1;
len = 0;
UP(l,1,n-1)
{
for(i=0;i+l<n;i+=l)
{
k = lcp(i,i+l);
int m = k/l+1;
int t = l-k%l;
t = i-t;
if(t>=0 && k%l)
{
if(lcp(t,t+l)>=k) m++;
}
if(m>maxn)
{
len = 0;
ans[len++] = l;
maxn = m;
}
else if(m == maxn)
{
ans[len++] = l;
}
}
}
int start,flag = 0;
UP(i,1,n)
{
if(flag)
break;
UP(j,0,len-1)
{
int tem = ans[j];
if(lcp(sa[i],sa[i]+tem)>=(maxn-1)*tem)
{
start = sa[i];
l = tem*maxn;
flag = 1;
break;
}
}
}
printf("Case %d: ",cas++);
UP(i,0,l-1)
printf("%c",str[start+i]);
printf("\n");
}
}
题意:求字符串中所有出现至少2次的子串个数
https://blog.csdn.net/libin56842/article/details/46431531
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define LS 2*i
#define RS 2*i+1
#define UP(i,x,y) for(i=x;i<=y;i++)
#define DOWN(i,x,y) for(i=x;i>=y;i--)
#define MEM(a,x) memset(a,x,sizeof(a))
#define W(a) while(a)
#define gcd(a,b) __gcd(a,b)
#define LL long long
#define N 100005
#define MOD 1000000007
#define INF 0x3f3f3f3f
#define EXP 1e-8
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank1[N],height[N],s[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank1[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank1[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank1[i]]=k;
}
}
char str[N];
int main()
{
int t,i,j,n,len;
scanf("%d",&t);
while(t--)
{
scanf("%s",str);
len = strlen(str);
for(i = 0; i<len; i++)
s[i] = str[i];
s[len] = 0;
getsa(s,sa,len+1,200);
getheight(s,len);
int cnt = 0,minn = 0,maxn = 0;
for(i = 1; i<=len; i++)
{
if(height[i]>maxn)
{
maxn = height[i];
}
else
{
cnt+=maxn-minn;
maxn = height[i];
minn = height[i];
}
}
cnt+=maxn-minn;
printf("%d\n",cnt);
}
return 0;
}
计蒜客总结后缀数组:
后缀数组模板 处理了sa和height字符串从 1 开始 输入时用%s,ch+1读入,求字符串长度为strlen(ch+1)
void da()
{
for (int i = 0; i < 130; i ++) cntA[i] = 0;
for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
for (int i = 1; i < 130; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
_rank[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
_rank[sa[i]] = _rank[sa[i - 1]];
if (ch[sa[i]] != ch[sa[i - 1]]) _rank[sa[i]] ++;
}
for (int l = 1; _rank[sa[n]] < n; l <<= 1)
{
for (int i = 0; i <= n; i ++) cntA[i] = 0;
for (int i = 0; i <= n; i ++) cntB[i] = 0;
for (int i = 1; i <= n; i ++)
{
cntA[A[i] = _rank[i]] ++;
cntB[B[i] = (i + l <= n) ? _rank[i + l] : 0] ++;
}
for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
_rank[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
_rank[sa[i]] = _rank[sa[i - 1]];
if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) _rank[sa[i]] ++;
}
}
for (int i = 1, j = 0; i <= n; i ++)
{
if (j) j --;
while (ch[i + j] == ch[sa[_rank[i] - 1] + j]) j ++;
height[_rank[i]] = j;
}
}
1.最长重复不重叠子串长度(差分)https://www.jisuanke.com/course/1177/62045 输出最长重复不重叠子串的长度
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn = 40005;
int arr[maxn], ch[maxn], A[maxn], B[maxn]; int n, l, r, mid, res;
int cntA[maxn], cntB[maxn], _rank[maxn], sa[maxn], tsa[maxn];
int height[maxn];
void da()
{
for (int i = 0; i < 130; i ++) cntA[i] = 0;
for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
for (int i = 1; i < 130; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
_rank[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
_rank[sa[i]] = _rank[sa[i - 1]];
if (ch[sa[i]] != ch[sa[i - 1]]) _rank[sa[i]] ++;
}
for (int l = 1; _rank[sa[n]] < n; l <<= 1)
{
for (int i = 0; i <= n; i ++) cntA[i] = 0;
for (int i = 0; i <= n; i ++) cntB[i] = 0;
for (int i = 1; i <= n; i ++)
{
cntA[A[i] = _rank[i]] ++;
cntB[B[i] = (i + l <= n) ? _rank[i + l] : 0] ++;
}
for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
_rank[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
_rank[sa[i]] = _rank[sa[i - 1]];
if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) _rank[sa[i]] ++;
}
}
for (int i = 1, j = 0; i <= n; i ++)
{
if (j) j --;
while (ch[i + j] == ch[sa[_rank[i] - 1] + j]) j ++;
height[_rank[i]] = j;
}
}
bool check(int v)
{
int left = sa[1], right = sa[1];
for (int i = 2; i <= n; i++)
{
if (height[i] >= v)
{
left = min(sa[i], left);
right = max(sa[i], right);
}
else
left = right = sa[i];
if (right - left >= v) return true;
}
return false;
}
int main()
{
while (scanf("%d", &n) != EOF && n > 0)
{
for (int i = 1; i <= n; i++) scanf("%d", &arr[i]);
for (int i = 1; i < n; i++) ch[i] = arr[i + 1] - arr[i] + 90;
da();
l = 1, r = n, res = 0;
while (l <= r)
{
mid = (l + r) >> 1;
if (check(mid))
{
res = mid;
l = mid + 1;
}
else
r = mid - 1;
}
res++;
printf("%d\n", res);
}
return 0;
}
2.k次最长重复子串https://www.jisuanke.com/course/1177/62046 输出k次的最长重复子串
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn = 40005;
int n, k, l, r, mid, res;
int ch[maxn], height[maxn];
int cntA[maxn], cntB[maxn];
int A[maxn], B[maxn];
int sa[maxn], tsa[maxn], rk[maxn];
void da()
{
for (int i = 0; i < 256; i ++) cntA[i] = 0;
for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
}
for (int l = 1; rk[sa[n]] < n; l <<= 1)
{
for (int i = 0; i <= n; i ++) cntA[i] = 0;
for (int i = 0; i <= n; i ++) cntB[i] = 0;
for (int i = 1; i <= n; i ++)
{
cntA[A[i] = rk[i]] ++;
cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
}
for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
}
}
for (int i = 1, j = 0; i <= n; i ++)
{
if (j) j --;
while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
height[rk[i]] = j;
}
}
bool check(int v)
{
int l = 1; if (v <= 1) return true;
for (int i = 2; i <= n; i++)
if (height[i] >= v)
{
if (i - l + 1 >= k) return true;
}
else
l = i;
return false;
}
int main()
{
scanf("%d%d", &n, &k);
for (int i = 1; i <= n; i++) scanf("%d", &ch[i]);
da();
l = 1, r = n, res = 0;
while (l <= r)
{
int mid = (l + r) >> 1;
if (check(mid))
{
res = mid;
l = mid + 1;
}
else
r = mid - 1;
}
printf("%d\n", res);
return 0;
}
3.子串排名https://www.jisuanke.com/course/1177/62048 输出字典序为第k个的子串的首位置和末位置,从1开始
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
typedef long long ll;
const int maxn = 200005;
char ch[maxn]; int n, q, l, r, mid, res;
int A[maxn], B[maxn], cntA[maxn], cntB[maxn];
int sa[maxn], tsa[maxn], rk[maxn], height[maxn];
ll sum[maxn], k;
void da()
{
for (int i = 0; i < 256; i ++) cntA[i] = 0;
for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
}
for (int l = 1; rk[sa[n]] < n; l <<= 1)
{
for (int i = 0; i <= n; i ++) cntA[i] = 0;
for (int i = 0; i <= n; i ++) cntB[i] = 0;
for (int i = 1; i <= n; i ++)
{
cntA[A[i] = rk[i]] ++;
cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
}
for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
}
}
for (int i = 1, j = 0; i <= n; i ++)
{
if (j) j --;
while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
height[rk[i]] = j;
}
}
int main()
{
while (scanf("%s", ch + 1) != EOF)
{
n = strlen(ch + 1);
da();
for (int i = 1; i <= n; i++)
sum[i] = sum[i - 1] + (n - sa[i] + 1 - height[i]);
scanf("%d", &q); l = r = 0;
while (q--)
{
scanf("%lld", &k); k ^= l; k ^= r; k++;
if (k > sum[n]) l = r = 0;
else
{
l = 1, r = n, res = 0;
while (l <= r)
{
mid = (l + r) >> 1;
if (sum[mid] >= k)
{
res = mid;
r = mid - 1;
}
else
l = mid + 1;
}
l = sa[res], r = sa[res] + height[res] + k - sum[res - 1] - 1;
int len = r - l + 1;
for (res++; res <= n && height[res] >= len; res++)
if (sa[res] < l)
l = sa[res], r = l + len - 1;
}
printf("%d %d\n", l, r);
}
}
return 0;
}
4.重复不重叠子串个数https://www.jisuanke.com/course/1177/62051输出重复不重叠子串个数
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn = 2005;
char ch[maxn]; int n, l, r, res;
int A[maxn], B[maxn], cntA[maxn], cntB[maxn];
int sa[maxn], tsa[maxn], rk[maxn], height[maxn];
void da()
{
for (int i = 0; i < 256; i ++) cntA[i] = 0;
for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
}
for (int l = 1; rk[sa[n]] < n; l <<= 1)
{
for (int i = 0; i <= n; i ++) cntA[i] = 0;
for (int i = 0; i <= n; i ++) cntB[i] = 0;
for (int i = 1; i <= n; i ++)
{
cntA[A[i] = rk[i]] ++;
cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
}
for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
}
}
for (int i = 1, j = 0; i <= n; i ++)
{
if (j) j --;
while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
height[rk[i]] = j;
}
}
int main()
{
while (scanf("%s", ch + 1) != EOF && ch[1] != '#')
{
n = strlen(ch + 1); res = 0;
da();
for (int len = 1; len <= n / 2; len++)
{
l = r = sa[1];
for (int i = 2; i <= n; i++)
if (height[i] >= len)
{
l = min(sa[i], l);
r = max(sa[i], r);
}
else
{
if (l + len <= r) res++;
l = r = sa[i];
}
if (l + len <= r) res ++;
}
printf("%d\n", res);
}
return 0;
}
5.最长公共子串 https://www.jisuanke.com/course/1177/62055 输出最长公共子串长度
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn = 200005;
int l1, l2, n, res;
char ch[maxn];
int sa[maxn], height[maxn];
int tsa[maxn], rk[maxn], A[maxn], B[maxn];
int cntA[maxn], cntB[maxn];
void da()
{
for (int i = 0; i < 256; i ++) cntA[i] = 0;
for (int i = 1; i <= n; i ++) cntA[ch[i]] ++;
for (int i = 1; i < 256; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[ch[i]] --] = i;
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (ch[sa[i]] != ch[sa[i - 1]]) rk[sa[i]] ++;
}
for (int l = 1; rk[sa[n]] < n; l <<= 1)
{
for (int i = 0; i <= n; i ++) cntA[i] = 0;
for (int i = 0; i <= n; i ++) cntB[i] = 0;
for (int i = 1; i <= n; i ++)
{
cntA[A[i] = rk[i]] ++;
cntB[B[i] = (i + l <= n) ? rk[i + l] : 0] ++;
}
for (int i = 1; i <= n; i ++) cntB[i] += cntB[i - 1];
for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1];
for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
}
}
for (int i = 1, j = 0; i <= n; i ++)
{
if (j) j --;
while (ch[i + j] == ch[sa[rk[i] - 1] + j]) j ++;
height[rk[i]] = j;
}
}
int belong(int i)
{
return (i <= l1) ? (-1) : 1;
}
int main()
{
scanf("%s", ch + 1);
l1 = strlen(ch + 1);
ch[l1 + 1] = '$';
scanf("%s", ch + l1 + 2);
l2 = strlen(ch + l1 + 2);
n = strlen(ch + 1);
da();
for (int i = 2; i <= n; i++)
if (belong(sa[i - 1]) * belong(sa[i]) < 0)
res = max(height[i], res);
printf("%d\n", res);
return 0;
}
6.求两个字符串长度大于等于k的公共子串个数
https://www.jisuanke.com/course/1177/62056
#include <stdio.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <string>
#include <algorithm>
#include <vector>
using namespace std;
typedef long long ll;
#define INF 0x7f7f7f7f
#define MAXN 200005
ll res;
int k, n, m;
char s[MAXN];
int rk[MAXN], sa[MAXN], tsa[MAXN], height[MAXN];
int A[MAXN], B[MAXN], cntA[MAXN], cntB[MAXN];
void da()
{
for (int i = 0; i < 256; i ++) cntA[i] = 0;
for (int i = 1; i <= n; i ++) cntA[s[i]] ++;
for (int i = 0; i < 256; i ++) cntA[i + 1] += cntA[i];
for (int i = n; i; i --) sa[cntA[s[i]] --] = i;
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (s[sa[i]] != s[sa[i - 1]]) rk[sa[i]] ++;
}
for (int l = 1; l < n && rk[sa[n]] < n; l <<= 1)
{
for (int i = 0; i <= n; i ++) cntA[i] = cntB[i] = 0;
for (int i = 1; i <= n; i ++)
{
cntA[A[i] = rk[i]] ++;
cntB[B[i] = i + l <= n ? rk[i + l] : 0] ++;
}
for (int i = 1; i <= n; i ++) cntA[i] += cntA[i - 1], cntB[i] += cntB[i - 1];
for (int i = n; i; i --) tsa[cntB[B[i]] --] = i;
for (int i = n; i; i --) sa[cntA[A[tsa[i]]] --] = tsa[i];
rk[sa[1]] = 1;
for (int i = 2; i <= n; i ++)
{
rk[sa[i]] = rk[sa[i - 1]];
if (A[sa[i]] != A[sa[i - 1]] || B[sa[i]] != B[sa[i - 1]]) rk[sa[i]] ++;
}
}
for (int i = 1, j = 0; i <= n; i ++)
{
if (j) j --;
while (s[i + j] == s[sa[rk[i] - 1] + j]) j ++;
height[rk[i]] = j;
}
}
int stackA[MAXN], topA; ll sA;
int stackB[MAXN], topB; ll sB;
void solve()
{
res = sA = sB = topA = topB = 0;
for (int i = 3; i <= n; i ++)
if (height[i] < k)
{
sA = sB = topA = topB = 0;
} else {
if (sa[i - 1] <= m)
{
int s = height[i] - k + 1, c = 1;
while (topA > 0 && s <= stackA[topA])
{
sA -= 1ll * stackA[topA] * cntA[topA];
c += cntA[topA];
topA --;
}
sA += 1ll * s * c;
topA ++; stackA[topA] = s; cntA[topA] = c;
s = height[i] - k + 1, c = 0;
while (topB > 0 && s <= stackB[topB])
{
sB -= 1ll * stackB[topB] * cntB[topB];
c += cntB[topB];
topB --;
}
if (c > 0)
{
sB += 1ll * s * c;
topB ++; stackB[topB] = s; cntB[topB] = c;
}
} else {
int s = height[i] - k + 1, c = 1;
while (topB > 0 && s <= stackB[topB])
{
sB -= 1ll * stackB[topB] * cntB[topB];
c += cntB[topB];
topB --;
}
sB += 1ll * s * c;
topB ++; stackB[topB] = s; cntB[topB] = c;
s = height[i] - k + 1, c = 0;
while (topA > 0 && s <= stackA[topA])
{
sA -= 1ll * stackA[topA] * cntA[topA];
c += cntA[topA];
topA --;
}
if (c > 0)
{
sA += 1ll * s * c;
topA ++; stackA[topA] = s; cntA[topA] = c;
}
}
if (sa[i] <= m)
res += sB;
else
res += sA;
}
}
int main()
{
while (scanf("%d", &k) != EOF && k > 0)
{
scanf("%s", s + 1); m = strlen(s + 1);
s[m + 1] = '$';
scanf("%s", s + m + 2); n = strlen(s + 1);
da();
solve();
printf("%lld\n", res);
}
return 0;
}
7.求多个字符串的最长公共子串(可反转) poj1226 输出最长公共子串的长度
#include <iostream>
#include <stdio.h>
#include <string.h>
#include <stack>
#include <queue>
#include <map>
#include <set>
#include <vector>
#include <math.h>
#include <bitset>
#include <algorithm>
#include <climits>
using namespace std;
#define MEM(a,x) memset(a,x,sizeof(a))
#define N 1000005
int wa[N],wb[N],wsf[N],wv[N],sa[N];
int rank[N],height[N],s[N],a[N];
//sa:字典序中排第i位的起始位置在str中第sa[i]
//rank:就是str第i个位置的后缀是在字典序排第几
//height:字典序排i和i-1的后缀的最长公共前缀
int cmp(int *r,int a,int b,int k)
{
return r[a]==r[b]&&r[a+k]==r[b+k];
}
void getsa(int *r,int *sa,int n,int m)//n要包含末尾添加的0
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[x[i]=r[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[x[i]]]=i;
p=1;
j=1;
for(; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wsf[i]=0;
for(i=0; i<n; i++) wsf[wv[i]]++;
for(i=1; i<m; i++) wsf[i]+=wsf[i-1];
for(i=n-1; i>=0; i--) sa[--wsf[wv[i]]]=y[i];
t=x;
x=y;
y=t;
x[sa[0]]=0;
for(p=1,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)? p-1:p++;
}
}
void getheight(int *r,int n)//n不保存最后的0
{
int i,j,k=0;
for(i=1; i<=n; i++) rank[sa[i]]=i;
for(i=0; i<n; i++)
{
if(k)
k--;
else
k=0;
j=sa[rank[i]-1];
while(r[i+k]==r[j+k])
k++;
height[rank[i]]=k;
}
}
char str[N];
int len[105],size,ans[N],id[N];
bool vis[105];
bool check(int mid,int n,int k)
{
int i,j;
int size = 0,cnt = 0;
MEM(vis,false);
for(i = 1; i<=n; i++)
{
if(height[i]>=mid)
{
for(j = 0; j<k; j++)
{
if(id[sa[i]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
if(id[sa[i-1]]==j) cnt+=(vis[j]?0:1),vis[j]=true;
}
}
else
{
if(cnt>=k) return true;
cnt = 0;
MEM(vis,false);
}
}
if(cnt>=k) return true;
return false;
}
int main()
{
int n,k,i,j,flag = 0,t;
scanf("%d",&t);
while(t--)
{
scanf("%d",&k);
n = 0;
size = 0;
int p = 1;
for(i = 0; i<k; i++)
{
scanf("%s",str);
int ll = strlen(str);
for(j = 0; j<ll; j++)
{
id[n] = i;
s[n++] = str[j];
}
s[n++] = '#'+(p++);
for(j = ll-1; j>=0; j--)
{
id[n] = i;
s[n++] = str[j];
}
s[n++] = '#'+(p++);
}
s[n-1] = 0;
getsa(s,sa,n,255);
getheight(s,n-1);
int l=1,r=n,mid,ans = 0;
while(l<=r)
{
mid = (l+r)/2;
if(check(mid,n,k))
{
ans = mid;
l = mid+1;
}
else r = mid-1;
}
printf("%d\n",ans);
}
return 0;
}
8.求大于k/2个字符串中含有的最长公共子串(修改次数,修改ans)输出该子串 poj3294
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <queue>
#include <algorithm>
#include <map>
#include <iomanip>
#define INF 99999999;
typedef long long LL;
using namespace std;
const int MAX=200000+10;
int *rank,r[MAX],sa[MAX],height[MAX],size;
int wa[MAX],wb[MAX],wm[MAX],len[110],pos[MAX];
bool mark[110];
char s[MAX];
bool cmp(int *r,int a,int b,int l){
return r[a] == r[b] && r[a+l] == r[b+l];
}
void makesa(int *r,int *sa,int n,int m){
int *x=wa,*y=wb,*t;
for(int i=0;i<m;++i)wm[i]=0;
for(int i=0;i<n;++i)wm[x[i]=r[i]]++;
for(int i=1;i<m;++i)wm[i]+=wm[i-1];
for(int i=n-1;i>=0;--i)sa[--wm[x[i]]]=i;
for(int i=0,j=1,p=0;p<n;j=j*2,m=p){
for(p=0,i=n-j;i<n;++i)y[p++]=i;
for(i=0;i<n;++i)if(sa[i]>=j)y[p++]=sa[i]-j;
for(i=0;i<m;++i)wm[i]=0;
for(i=0;i<n;++i)wm[x[y[i]]]++;
for(i=1;i<m;++i)wm[i]+=wm[i-1];
for(i=n-1;i>=0;--i)sa[--wm[x[y[i]]]]=y[i];
for(t=x,x=y,y=t,i=p=1,x[sa[0]]=0;i<n;++i){
x[sa[i]]=cmp(y,sa[i],sa[i-1],j)?p-1:p++;
}
}
rank=x;
}
void calheight(int *r,int *sa,int n){
for(int i=0,j=0,k=0;i<n;height[rank[i++]]=k){
for(k?--k:0,j=sa[rank[i]-1];r[i+k] == r[j+k];++k);
}
}
bool check(int mid,int n,int k){
memset(mark,false,sizeof mark);
int num=0,ans=0;
for(int i=1;i<=n;++i){
if(height[i]>=mid){
for(int j=1;j<=size;++j){//判断sa[i]和sa[i-1]所属的字符串是否已被计算过
if(sa[i]>len[j-1] && sa[i]<len[j])ans+=(mark[j]?0:1),mark[j]=true;
if(sa[i-1]>len[j-1] && sa[i-1]<len[j])ans+=(mark[j]?0:1),mark[j]=true;
}
}else{
if(ans>k/2)pos[++num]=sa[i-1];//pos记录满足条件的子串首位置
ans=0;
memset(mark,false,sizeof mark);
}
}
if(ans>k/2)pos[++num]=sa[n];
if(num){pos[0]=num;return true;}
return false;
}
int main(){
int k,n,num=0;
len[0]=-1;//注意len[0]初始化<0
while(cin>>k,k){
pos[0]=size=n=0;//注意初始化
for(int i=0;i<k;++i){
scanf("%s",s+n);
for(;s[n] != '\0';++n)r[n]=s[n];
r[len[++size]=n++]=300+i;//300+i保证每个分隔的字符不同
}
r[n-1]=0;
makesa(r,sa,n,400);
calheight(r,sa,n-1);
int L=1,R=n,mid;
while(L<=R){
mid=L+R>>1;
if(check(mid,n,k))L=mid+1;
else R=mid-1;
}
if(num++)printf("\n");
if(L-1 == 0)printf("?\n");
else{
for(int i=1;i<=pos[0];++i){
for(int j=pos[i];j<pos[i]+L-1;++j)printf("%c",s[j]);
printf("\n");
}
}
}
return 0;
}
9.多个字符串求 出现2次以上且不重叠的子串个数 hdu3518 输出子串个数
#include <cstdio>
#include <cstring>
#define max(a,b) ((a)>(b)?(a):(b))
#define min(a,b) ((a)<(b)?(a):(b))
const int N = int(1e4)+10;
const int INF=0x3fffffff;
int cmp(int *r,int a,int b,int l){
return (r[a]==r[b]) && (r[a+l]==r[b+l]);
}
int wa[N],wb[N],ws[N],wv[N];
int rank[N],height[N];
void DA(int *r,int *sa,int n,int m){
int i,j,p,*x=wa,*y=wb,*t;
for(i=0;i<m;i++) ws[i]=0;
for(i=0;i<n;i++) ws[x[i]=r[i]]++;
for(i=1;i<m;i++) ws[i]+=ws[i-1];
for(i=n-1;i>=0;i--) sa[--ws[x[i]]]=i;
for(j=1,p=1;p<n;j*=2,m=p)
{
for(p=0,i=n-j;i<n;i++) y[p++]=i;
for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0;i<n;i++) wv[i]=x[y[i]];
for(i=0;i<m;i++) ws[i]=0;
for(i=0;i<n;i++) ws[wv[i]]++;
for(i=1;i<m;i++) ws[i]+=ws[i-1];
for(i=n-1;i>=0;i--) sa[--ws[wv[i]]]=y[i];
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
//printf("p = %d\n", p );
}
}
void calheight(int *r,int *sa,int n){
// memset(height,0,sizeof(height));
// memset(rank,0,sizeof(rank));
int i,j,k=0;
for(i=1;i<=n;i++) rank[sa[i]]=i;
for(i=0;i<n; height[rank[i++]] = k )
for(k?k--:0,j=sa[rank[i]-1]; r[i+k]==r[j+k]; k++);
}
int data[N],sa[N],temp[N],n,k,start;
char str[N];
int Cal (int x)
{
int maxn,minn,ans=0;
for (int i=1;i<=n;i++)
{
int L=i;
while (L<=n && height[L]<x) L++;
if (L>n) break;
int R=L;
while (R<=n && height[R]>=x) R++;
minn=INF,maxn=-INF;
for (int j=L-1;j<=R-1;j++)
{
minn=min(minn,sa[j]);
maxn=max(maxn,sa[j]);
}
if (maxn-minn>=x)
ans++;
i=R;
}
return ans;
}
void Deal ()
{
DA(data,sa,n+1,200);
calheight(data,sa,n);
int i,ans=0,temp;
for (i=1;i<=n/2;i++)
{
temp=Cal(i);
if (temp==0) //不可能有更长的了
break;
ans+=temp;
}
printf("%d\n",ans);
}
int main ()
{
while (~scanf("%s",str),str[0]!='#')
{
n=strlen(str);
for (int i=0;i<n;i++)
data[i]=(int)str[i];
data[n] = 0;
Deal();
}
return 0;
}
10.每个字符串中都至少出现2次的不重叠最长子串 spoj220 输出长度
#include <cstdio>
#include <cstring>
#include <cmath>
#include <algorithm>
using namespace std;
#define max(a,b) ((a)>(b)?(a):(b))
#define min(a,b) ((a)<(b)?(a):(b))
const int N = int(2e5)+100;
int cmp(int *r,int a,int b,int l){
return (r[a]==r[b]) && (r[a+l]==r[b+l]);
}
int wa[N],wb[N],ws[N],wv[N];
int rank[N],height[N];
void DA(int *r,int *sa,int n,int m){
int i,j,p,*x=wa,*y=wb,*t;
for(i=0;i<m;i++) ws[i]=0;
for(i=0;i<n;i++) ws[x[i]=r[i]]++;
for(i=1;i<m;i++) ws[i]+=ws[i-1];
for(i=n-1;i>=0;i--) sa[--ws[x[i]]]=i;
for(j=1,p=1;p<n;j*=2,m=p)
{
for(p=0,i=n-j;i<n;i++) y[p++]=i;
for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0;i<n;i++) wv[i]=x[y[i]];
for(i=0;i<m;i++) ws[i]=0;
for(i=0;i<n;i++) ws[wv[i]]++;
for(i=1;i<m;i++) ws[i]+=ws[i-1];
for(i=n-1;i>=0;i--) sa[--ws[wv[i]]]=y[i];
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
//printf("p = %d\n", p );
}
}
void calheight(int *r,int *sa,int n){
// memset(height,0,sizeof(height));
// memset(rank,0,sizeof(rank));
int i,j,k=0;
for(i=1;i<=n;i++) rank[sa[i]]=i;
for(i=0;i<n; height[rank[i++]] = k )
for(k?k--:0,j=sa[rank[i]-1]; r[i+k]==r[j+k]; k++);
}
int data[N],loc[N],sa[N];
int n,q,id,vis[15],start[15];
char str[10005];
bool Judge (int x)
{
int i,j;
bool flag=false;
for (i=1;i<=n;i++)
{
int cnt=0;
int L=i;
while (L<=n && height[L]<x)
L++;
if (L>n) break;
int R=L;
while (R<=n && height[R]>=x)
R++;
memset(vis,0,sizeof(vis));
memset(start,0,sizeof(start));
for (j=L-1;j<=R-1;j++) if (loc[sa[j]]!=-1)
{
if (vis[loc[sa[j]]]==0) //之前没出现过
{
vis[loc[sa[j]]]=1; //出现一次
start[loc[sa[j]]]=sa[j]; //记录本次出现时的起始位置
}
else
{
if (abs(sa[j]-start[loc[sa[j]]])>=x) //不重叠
vis[loc[sa[j]]]++; //出现次数+1
}
}
for (j=1;j<=q;j++)
if (vis[j]>=2)
cnt++;
if (cnt==q)
return true;
i=R;
}
return false;
}
void Input ()
{
int i,sign=130;//分隔符
n=0;
memset(data,0,sizeof(data));
for (int id=1;id<=q;id++)
{
scanf("%s",str);
int len=strlen(str);
for (i=0;i<len;i++)
{
loc[n]=id;
data[n++]=(int)str[i];
}
loc[n]=-1;
data[n++]=sign++; //注意分隔符需变化
}
data[n]=0;
DA(data,sa,n+1,sign+5);
calheight(data,sa,n);
}
int main ()
{
int T;
scanf("%d",&T);
while (T--)
{
scanf("%d",&q);
Input ();
int low=0,high=n,mid,res=0;
while (low<high)
{
mid = (low+high)>>1;
if (Judge(mid))
res=mid,low=mid+1;
else
high=mid;
}
printf("%d\n",res);
}
return 0;
}