POJ Milk Patterns (suffix array and dichotomy)

This question is basically the same as the Musical Theme I wrote yesterday . That blog has a suffix array study. If you haven't written that question before, you can do it after you finish it.

Title

Obviously, it is to find the length of the longest repeatable substring, but the number of repetitions of the substring must not be less than K.

Ideas

Luo Suiqian "Suffix Array-A Powerful Tool for Processing Strings"
first finds the suffix tree group, and then uses the suffix tree group to find the height array (height[i]: stores the longest repeated prefix of the suffix substrings ranked i and i-1 Substring length). Divide the sorted suffixes into several groups, where the height value between the suffixes of each group is not less than k. For example, the string is "aabaaaab", when k=2, the suffix is ​​divided into 4 groups as shown in the figure. It
can be imagined that when the height in a group is not less than k, we only need to judge the number of suffix arrays contained in the group Whether it is not less than K is enough, if it is satisfied, then the condition is also satisfied when the length is equal to k, so we need to divide the length into two, and then judge whether it is feasible.

Code
#pragma GCC optimize(2)
//#include<bits/stdc++.h>
#include<iostream>
#include<algorithm>
#include<cstdio>
using namespace std;

typedef long long ll;
typedef unsigned long ul;
typedef unsigned long long ull;
#define pi acos(-1.0)
#define e exp(1.0)
#define pb push_back
#define mk make_pair
#define fir first
#define sec second
#define scf scanf
#define prf printf
typedef pair<ll,ll> pa;
const ll INF=0x3f3f3f3f3f3f3f3f;
const ll maxn=2e4+7;
const ll maxnum=1e6+7;
ll N,M,K;
ll sa[maxn],rank[maxn],r[maxn],height[maxn],tmp[maxn]; 
bool cmp(ll i,ll j){
    
    
	if(rank[i]!=rank[j])
	return rank[i]<rank[j];
	ll r1=i+K<=N?rank[i+K]:-1;
	ll r2=j+K<=N?rank[j+K]:-1;
	return r1<r2;
}
//求后缀数组 
void do_sa(){
    
    
	ll i,j;
	for(i=0;i<=N;i++){
    
    
		sa[i]=i;
		rank[sa[i]]=(i!=N?r[i]:-1);
	}
	for(K=1;K<=N;K<<=1){
    
    
		sort(sa,sa+1+N,cmp);
		tmp[sa[0]]=0;
		for(i=1;i<=N;i++)
		tmp[sa[i]]=tmp[sa[i-1]]+(cmp(sa[i-1],sa[i])?1:0);
		for(i=0;i<=N;i++)
		rank[i]=tmp[i];
	}
	return ;
}
void get_height(){
    
    
	ll i,j,k=0;
	//枚举每个后缀的起始位置 
	for(i=0;i<N;i++){
    
    
		if(k)
		k--;
		else
		k=0;
		j=sa[rank[i]-1];
		while(r[i+k]==r[j+k])
		k++;
		height[rank[i]]=k;//height[i]:存储排名为i和i-1的后缀子串的最长前缀子串长度 
	}
	return ;
	
}
bool check(ll mid){
    
    
	ll i,j,cnt=0;
	for(i=1;i<=N;i++){
    
    
		if(height[i]>=mid){
    
    
			cnt++;
			if(cnt+1>=M)
			return true;
		}
		else
		cnt=0;
	}
	return false;
}
int main()
{
    
    
//  freopen(".../.txt","w",stdout);
//  freopen(".../.txt","r",stdin);
	ios::sync_with_stdio(false);
	ll i,j,k;
	cin>>N>>M;
	for(i=0;i<N;i++)
	cin>>r[i];
	do_sa();
	get_height();
	ll L=0,R=N,mid,res=0;
	while(L<=R){
    
    
		mid=(R-L)/2+L;
		if(check(mid)){
    
    
			L=mid+1;
			res=max(res,mid);
		} 
		else{
    
    
			R=mid-1;
		}
	}
	cout<<res<<endl;
	return 0;
}

Guess you like

Origin blog.csdn.net/weixin_43311695/article/details/107656420