题目:
Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
Output
For each test case output one number saying the number of distinct substrings.
Example
Sample Input:
2
CCCCC
ABABA
Sample Output:
5
9
Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.
题目解释的很清楚,样例解释也非常清楚
就是在一个字符串中求不相同的子串的个数
分析:
每一个子串一定是某个后缀的前缀,那么原问题等价于求所有后缀之间 的不相同的前缀的个数,如果所有的后缀按照
suffix(sa[1]),suffix(sa[2],.........suffix(sa[n])的顺序计算,不难发现,对于每一次新加进来的后缀saffix(sa[k]),它将产生
n - sa[k] + 1个新的前缀。但是其中有height[k]个是和前面的字符串中的前缀是相同的。所以suffix(sa[k])将“贡献”出
n - sa[k] + 1 - height[k]个不同的子串,累加后便是原问题的答案,时间复杂负为O(n);
代码:
#include <cstdio> #include <iostream> #include <cstring> using namespace std; const int maxn = 100000 + 10; int sa[maxn],rankk[maxn],height[maxn]; int wa[maxn],wb[maxn],Ws[maxn],wv[maxn]; char str[maxn]; int cmp (int *r,int a,int b,int l){ return r[a] == r[b] && r[a + l] == r[b + l]; } void get_sa (int *r,int n,int m){ int i , j, p , *x = wa,*y = wb,*t; for (i = 0; i < m; i++) Ws[i] = 0; for (i = 0; i < n; i++) Ws[x[i] = r[i]]++; for (i = 1; i < m; i++) Ws[i] += Ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--Ws[x[i]]] = i; for (j = 1, p = 1; p < n; j *= 2, m = p){ for (p = 0,i = n - j; i < n; i++) y[p++] = i; for (i = 0; i < n; i++) if (sa[i] >= j) y[p++] = sa[i] - j ; for (i = 0; i < n; i++) wv[i] = x[y[i]]; for (i = 0; i < m; i++) Ws[i] = 0; for (i = 0; i < n; i++) Ws[wv[i]]++; for (i = 0; i < m; i++) Ws[i] += Ws[i - 1]; for (i = n - 1; i >= 0; i--) sa[--Ws[wv[i]]] = y[i]; for (t = x, x = y, y = t,p = 1, x[sa[0]] = 0, i = 1; i < n ; i++){ x[sa[i]] = cmp(y,sa[i - 1],sa[i],j) ? p - 1 : p++; } } } void get_height(int *r,int n){ int k = 0, j ; for (int i = 1 ;i <= n; i++) rankk[sa[i]] = i; for (int i = 0; i < n; height[rankk[i++]] = k){ for (k ? k-- : 0, j = sa[rankk[i] - 1]; r[i + k] == r[j + k]; k++); } } int a[maxn]; int main (){ int T; scanf("%d",&T); getchar(); while (T--){ scanf("%s",str); int len = strlen(str); for (int i = 0 ; i < len; i++) { a[i] = str[i]; } int m = 128; a[len] = 0; get_sa(a,len + 1,m); get_height(a,len); int ans = 0; for (int i = 1; i <= len; i++){ ans += len - sa[i] - height[i]; } printf("%d\n",ans); } return 0; }相对于其它的后缀数组应用,自我感觉这个还算简单。。。