The problem of finding the number of different substrings in a suffix array

topic:

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000

Output

For each test case output one number saying the number of distinct substrings.

Example

Sample Input:
2
CCCCC
ABABA

Sample Output:
5
9

Explanation for the testcase with string ABABA: 
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.


The topic is explained very clearly, and the sample explanation is also very clear

It is to find the number of different substrings in a string

analyze:

Each substring must be a prefix of a suffix, then the original problem is equivalent to finding the number of different prefixes among all suffixes, if all suffixes follow

The sequential calculation of suffix(sa[1]), suffix(sa[2],.........suffix(sa[n]), it is not difficult to find that for each newly added suffix saffix(sa [k]), it will yield

n - sa[k] + 1 new prefix. But there are height[k] which is the same as the prefix in the previous string. So suffix(sa[k]) will "contribute" out

n - sa[k] + 1 - height[k] different substrings, after accumulation is the answer to the original question, and the time complexity is negative O(n);

Code:

#include <cstdio>
#include <iostream>
#include <cstring>
using namespace std;
const int maxn = 100000 + 10;

int sa[maxn],rankk[maxn],height[maxn];
int wa[maxn],wb[maxn],Ws[maxn],wv[maxn];
char str[maxn];

int cmp (int *r,int a,int b,int l){
    return r[a] == r[b] && r[a + l] == r[b + l];
}

void get_sa (int *r,int n,int m){
    int i , j, p , *x = wa,*y = wb,*t;
    for (i = 0; i < m; i++) Ws[i] = 0;
    for (i = 0; i < n; i++) Ws[x[i] = r[i]]++;
    for (i = 1; i < m; i++) Ws[i] += Ws[i - 1];
    for (i = n - 1; i >= 0; i--) sa[--Ws[x[i]]] = i;
    for (j = 1, p = 1; p < n; j *= 2, m = p){
        for (p = 0,i = n - j; i < n; i++) y[p++] = i;
        for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j ;
        for (i = 0; i < n; i++) wv[i] = x[y[i]];
        for (i = 0; i < m; i++) Ws[i] = 0;
        for (i = 0; i < n; i++) Ws[wv[i]]++;
        for (i = 0; i < m; i++) Ws[i] += Ws[i - 1];
        for (i = n - 1; i >= 0; i--) sa[--Ws[wv[i]]] = y[i];
        for (t = x, x = y, y = t,p = 1, x[sa[0]] = 0, i = 1; i < n ; i++){
            x [sa [i]] = cmp (y, sa [i - 1], sa [i], j)? p - 1: p ++;
        }
    }
}

void get_height(int *r,int n){
    int k = 0, j ;
    for (int i = 1 ;i <= n; i++) rankk[sa[i]] = i;
    for (int i = 0; i < n; height[rankk[i++]] = k){

        for (k? k--: 0, j = sa [rankk [i] - 1]; r [i + k] == r [j + k]; k ++);
    }
}

int a[maxn];

int main(){
    int T;
    scanf("%d",&T);
    getchar();
    while (T--){
        scanf("%s",str);
        int len ​​= strlen (str);
        for (int i = 0 ; i < len; i++) {
            a[i] = str[i];
        }
        int m = 128;
        a [len] = 0;
        get_sa (a, len + 1, m);
        get_height(a,len);
        int years = 0;
        for (int i = 1; i <= len; i++){
            ans += len - sa [i] - height [i];
        }
        printf("%d\n",ans);
    }
    return 0;
}
Compared with other suffix array applications, I feel that this is fairly simple. . .

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325884928&siteId=291194637