Exercise suffix array

PKU-1743 Musical Theme

Problem-solving ideas

Is essentially a finding the longest string length does not overlap the same sub-subject
following are seeking the same idea longest overlap length of the substring:
binary enumeration + height packet array. This question is a very clever idea, back to careful scrutiny. First binary answer, the subject into Entscheidungsproblem: determining whether there are two sub-strings of length k is the same, do not overlap. The key to solving this problem is the use of an array of height. Suffix sorted into several groups, wherein each height value between suffix is not less than k. For example, the string "aabaaaab", when k = 2, suffix divided into four groups, as shown in FIG.
Enter a description of the picture here
Readily seen, it promises to be the longest common prefix is not less than two suffixes k must be in the same group. Then, for each suffix, only determines the difference between the maximum and minimum values for each suffix sa is not less than k. If you have a group meet, then there is, or do not exist. Time complexity of the whole approach is O (nlogn).
Written following a DC3 and a suffix array = =
Musical Theme

Code

#include <algorithm>
#include <cstdio>
#include <cmath>
#include <cstring>
#include <iostream>
#include <cstdlib>
#include <set>
#include <vector>
#include <cctype>
#include <iomanip>
#include <sstream>
#include <climits>
#include <queue>
#include <stack>
using namespace std;
/*    freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN];     //rank为i的后缀的起始位置
int rk[MAXN];     //sa数组的映射
int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1])*/
int n, m;
int s[MAXN];
/* void Debug()
{
    printf("*****************\n");
    printf("下标");
    for (int i = 1; i <= n; i++)
        printf("%d ", i);
    printf("\n");
    printf("sa  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", sa[i]);
    printf("\n");
    printf("rak ");
    for (int i = 1; i <= n; i++)
        printf("%d ", rk[i]);
    printf("\n");
    printf("tp  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", tp[i]);
    printf("\n");
} */
void Qsort()
{
    for (int i = 0; i <= m; i++)
        tax[i] = 0;
    for (int i = 1; i <= n; i++)
        tax[rk[i]]++;
    for (int i = 1; i <= m; i++)
        tax[i] += tax[i - 1];
    for (int i = n; i >= 1; i--)
        sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
    m = 200;
    for (int i = 1; i <= n; i++)
        rk[i] = s[i], tp[i] = i;
    Qsort();
    // Debug();
    for (int w = 1, p = 0; p < n; m = p, w <<= 1)
    {
        //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
        //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
        p = 0; //这里的p仅仅是一个计数器
        for (int i = 1; i <= w; i++)
            tp[++p] = n - w + i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > w)
                tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
        Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
        swap(tp, rk);                //这里原本tp已经没有用了
        rk[sa[1]] = p = 1;
        for (int i = 2; i <= n; i++)
            rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
        //这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
        // Debug();
    }
}
void GetHeight()
{
    int j, k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (k)
            k--;
        j = sa[rk[i] - 1];
        while (s[i + k] == s[j + k])
            k++;
        Height[rk[i]] = k;
    }
}
bool check(int len)
{
    int minn = sa[1], maxx = sa[1];
    for (int i = 2; i <= n; i++)
    {
        if (Height[i] >= len - 1)
        {
            maxx = max(maxx, sa[i]);
            minn = min(minn, sa[i]);
        }
        else
            maxx = minn = sa[i];
        if (maxx - minn >= len)
            return true;
    }
    return false;
}
int ans = 0;
int main()
{
    int num;
    while (~scanf("%d", &n) && n)
    {
        ans = 0;
        for (int i = 1; i <= n; i++)
            scanf("%d", &s[i]);
        for (int i = n; i >= 1; i--)
            s[i] -= s[i - 1] - 100;
        SuffixSort();
        GetHeight();
        int l = 1, r = (n >> 1) + 1;
        while (l < r)
        {
            int mid = (l + r) >> 1;
            if (check(mid))
            {
                l = mid + 1;
                ans = mid;
            }
            else
                r = mid;
        }
        if (ans < 5)
            printf("0\n");
        else
            printf("%d\n", ans);
    }
    return 0;
}
//--------------------DC3
#include <cstdio>
#include <algorithm>
#include <queue>
#include <iostream>
#include <cmath>
#include <cstring>
using namespace std;
#define F(x) ((x) / 3 + ((x) % 3 == 1 ? 0 : tb))
#define G(x) ((x) < tb ? (x)*3 + 1 : ((x)-tb) * 3 + 2)
const int MAXN = 200000 + 100; //n*10
int sa[MAXN];
int rk[MAXN];
int height[MAXN];
int n;
int s[MAXN];
int r[MAXN];
int wa[MAXN], wb[MAXN], wv[MAXN];
int wws[MAXN];
void sort(int *r, int *a, int *b, int n, int m)
{
    int i;
    for (i = 0; i < n; i++)
        wv[i] = r[a[i]];
    for (i = 0; i < m; i++)
        wws[i] = 0;
    for (i = 0; i < n; i++)
        wws[wv[i]]++;
    for (i = 1; i < m; i++)
        wws[i] += wws[i - 1];
    for (i = n - 1; i >= 0; i--)
        b[--wws[wv[i]]] = a[i];
    return;
}
int c0(int *r, int a, int b)
{
    return r[a] == r[b] && r[a + 1] == r[b + 1] && r[a + 2] == r[b + 2];
}
int c12(int k, int *r, int a, int b)
{
    if (k == 2)
        return r[a] < r[b] || r[a] == r[b] && c12(1, r, a + 1, b + 1);
    else
        return r[a] < r[b] || r[a] == r[b] && wv[a + 1] < wv[b + 1];
}

void dc3(int *r, int *sa, int n, int m)
{
    int i, j, *rn = r + n, *san = sa + n, ta = 0, tb = (n + 1) / 3, tbc = 0, p;
    r[n] = r[n + 1] = 0;
    for (i = 0; i < n; i++)
        if (i % 3 != 0)
            wa[tbc++] = i;
    sort(r + 2, wa, wb, tbc, m);
    sort(r + 1, wb, wa, tbc, m);
    sort(r, wa, wb, tbc, m);
    for (p = 1, rn[F(wb[0])] = 0, i = 1; i < tbc; i++)
        rn[F(wb[i])] = c0(r, wb[i - 1], wb[i]) ? p - 1 : p++;
    if (p < tbc)
        dc3(rn, san, tbc, p);
    else
        for (i = 0; i < tbc; i++)
            san[rn[i]] = i;
    for (i = 0; i < tbc; i++)
        if (san[i] < tb)
            wb[ta++] = san[i] * 3;
    if (n % 3 == 1)
        wb[ta++] = n - 1;
    sort(r, wb, wa, ta, m);
    for (i = 0; i < tbc; i++)
        wv[wb[i] = G(san[i])] = i;
    for (i = 0, j = 0, p = 0; i < ta && j < tbc; p++)
        sa[p] = c12(wb[j] % 3, r, wa[i], wb[j]) ? wa[i++] : wb[j++];
    for (; i < ta; p++)
        sa[p] = wa[i++];
    for (; j < tbc; p++)
        sa[p] = wb[j++];
    return;
}
void calheight(int *r, int *sa, int n)
{
    int i, j, k = 0;
    for (i = 1; i <= n; ++i)
        rk[sa[i]] = i;
    for (i = 0; i < n; height[rk[i++]] = k)
        for (k ? k-- : 0, j = sa[rk[i] - 1]; r[i + k] == r[j + k]; ++k)
            ;
    return;
}
bool check(int len)
{
    int minn = sa[1], maxx = sa[1];
    for (int i = 2; i <= n; i++)
    {
        if (height[i] >= len - 1)
        {
            maxx = max(maxx, sa[i]);
            minn = min(minn, sa[i]);
        }
        else
            maxx = minn = sa[i];
        if (maxx - minn >= len)
            return true;
    }
    return false;
}
int ans;
int main()
{
    int num;
    while (~scanf("%d", &n) && n)
    {
        ans = 0;
        for (int i = 0; i < n; i++)
            scanf("%d", &s[i]);
        for (int i = n - 1; i >= 0; i--)
            s[i] -= s[i - 1] - 100;
        int Max = -1;
        for (int i = 0; i < n; i++)
        {
            r[i] = s[i];
            if (r[i] > Max)
                Max = r[i];
        }
        r[n] = 0;
        dc3(r, sa, n + 1, Max + 1);
        calheight(r, sa, n);
        int l = 1, r = (n >> 1) + 1;
        while (l < r)
        {
            int mid = (l + r) >> 1;
            if (check(mid))
            {
                l = mid + 1;
                ans = mid;
            }
            else
                r = mid;
        }
        if (ans < 5)
            printf("0\n");
        else
            printf("%d\n", ans);
    }
    return 0;
}

HDU-4622 Reincarnation

Reincarnation

The meaning of problems

The number of different sub-strings within the interval

#include <bits/stdc++.h>
using namespace std;
/*    freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN];     //rank为i的后缀的起始位置
int rk[MAXN];     //sa数组的映射
int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1])*/
int n, m;
char s[MAXN];
/* void Debug()
{
    printf("*****************\n");
    printf("下标");
    for (int i = 1; i <= n; i++)
        printf("%d ", i);
    printf("\n");
    printf("sa  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", sa[i]);
    printf("\n");
    printf("rak ");
    for (int i = 1; i <= n; i++)
        printf("%d ", rk[i]);
    printf("\n");
    printf("tp  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", tp[i]);
    printf("\n");
} */
void Qsort()
{
    for (int i = 0; i <= m; i++)
        tax[i] = 0;
    for (int i = 1; i <= n; i++)
        tax[rk[i]]++;
    for (int i = 1; i <= m; i++)
        tax[i] += tax[i - 1];
    for (int i = n; i >= 1; i--)
        sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
    m = 75;
    for (int i = 1; i <= n; i++)
        rk[i] = s[i] - '0' + 1, tp[i] = i;
    Qsort();
    // Debug();
    for (int w = 1, p = 0; p < n; m = p, w <<= 1)
    {
        //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
        //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
        p = 0; //这里的p仅仅是一个计数器
        for (int i = 1; i <= w; i++)
            tp[++p] = n - w + i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > w)
                tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
        Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
        swap(tp, rk);                //这里原本tp已经没有用了
        rk[sa[1]] = p = 1;
        for (int i = 2; i <= n; i++)
            rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
        //这里当两个后缀上一轮排名相同时本轮也相同,至于为什么大家可以思考一下
        // Debug();
    }
}
void GetHeight()
{
    int j, k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (k)
            k--;
        j = sa[rk[i] - 1];
        while (s[i + k] == s[j + k])
            k++;
        Height[rk[i]] = k;
    }
}
int st[MAXN][21];
int Query(int l, int r)
{
    int k = log2(r - l + 1);
    return min(st[l][k], st[r - (1 << k) + 1][k]);
}
int main()
{
    int t;
    scanf("%d", &t);
    while (t--)
    {
        scanf(" %s", s + 1);
        n = strlen(s + 1);
        SuffixSort();
        GetHeight();
        for (int i = 0; i <= n; i++)
            st[i][0] = Height[i];
        for (int i = 1; i <= 21; i++)
            for (int j = 1; j + (1 << i) - 1 <= n; j++)
                st[j][i] = min(st[j][i - 1], st[j + (1 << (i - 1))][i - 1]);
        //st处理出lcp(sa[i],sa[j])
        int q;
        scanf("%d", &q);
        while (q--)
        {
            int l, r;
            scanf("%d%d", &l, &r);
            int ans = (r - l + 1) * (r - l + 2) / 2;
            int cnt = 0;
            int pre = -1;
            for (int i = 1; i <= n; i++)
            {
                if (cnt == r - l + 1)
                    break;
                if (sa[i] < l || sa[i] > r)
                    continue;
                cnt++;
                if (pre == -1)
                {
                    pre = i;
                    continue;
                }
                int a = pre;
                int b = i;
                if (pre > i)
                    swap(pre, i);
                int lcp = Query(a + 1, b);
                int la = r - sa[pre] + 1;
                int lb = r - sa[i] + 1;
                if(!(la > lb && lcp >= lb))
                    pre = i;
                ans -= min(lcp, min(la, lb));
            }
            printf("%d\n", ans);
        }
    }
    return 0;
}

Cattle passenger CSL password

Topic Link

Meaning of the questions:

K of length not less than the number of different nature substring

#include <bits/stdc++.h>
using namespace std;
/*    freopen("k.in", "r", stdin);
    freopen("k.out", "w", stdout); */
//clock_t c1 = clock();
//std::cerr << "Time:" << clock() - c1 <<"ms" << std::endl;
//#pragma comment(linker, "/STACK:1024000000,1024000000")
#define de(a) cout << #a << " = " << a << endl
#define rep(i, a, n) for (int i = a; i <= n; i++)
#define per(i, a, n) for (int i = n; i >= a; i--)
typedef long long ll;
typedef unsigned long long ull;
typedef pair<int, int> PII;
typedef pair<double, double> PDD;
typedef vector<int, int> VII;
#define inf 0x3f3f3f3f
const ll INF = 0x3f3f3f3f3f3f3f3f;
const ll MAXN = 1e6 + 7;
const ll MAXM = 1e6 + 7;
const ll MOD = 1e9 + 7;
const double eps = 1e-6;
const double pi = acos(-1.0);
int sa[MAXN];     //rank为i的后缀的起始位置
int rk[MAXN];     //sa数组的映射
int tp[MAXN];     //基数排序的第二关键字,第二关键字排名为i的后缀的起始位置
int tax[MAXN];    //第i号元素出现了多少次,辅助基数排序
int Height[MAXN]; //排名为i的后缀与排名为i-1的后缀的最长公共前缀
/* lcp(sa[i],sa[i-1]) */
int n, m; // n字符串长度  m字符集大小
char s[MAXN], t[MAXN];
/* void Debug()
{
    printf("*****************\n");
    printf("下标");
    for (int i = 1; i <= n; i++)
        printf("%d ", i);
    printf("\n");
    printf("sa  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", sa[i]);
    printf("\n");
    printf("rak ");
    for (int i = 1; i <= n; i++)
        printf("%d ", rk[i]);
    printf("\n");
    printf("tp  ");
    for (int i = 1; i <= n; i++)
        printf("%d ", tp[i]);
    printf("\n");
} */
void Qsort()
{
    for (int i = 0; i <= m; i++)
        tax[i] = 0;
    for (int i = 1; i <= n; i++)
        tax[rk[i]]++;
    for (int i = 1; i <= m; i++)
        tax[i] += tax[i - 1];
    for (int i = n; i >= 1; i--)
        sa[tax[rk[tp[i]]]--] = tp[i];
}
void SuffixSort()
{
    m = 75;
    for (int i = 1; i <= n; i++)
        rk[i] = s[i] - '0' + 1, tp[i] = i;
    Qsort();
    // Debug();
    for (int w = 1, p = 0; p < n; m = p, w <<= 1)
    {
        //w:当前倍增的长度,w = x表示已经求出了长度为x的后缀的排名,现在要更新长度为2x的后缀的排名
        //p表示不同的后缀的个数,很显然原字符串的后缀都是不同的,因此p = n时可以退出循环
        p = 0; //这里的p仅仅是一个计数器
        for (int i = 1; i <= w; i++)
            tp[++p] = n - w + i;
        for (int i = 1; i <= n; i++)
            if (sa[i] > w)
                tp[++p] = sa[i] - w; //这两句是后缀数组的核心部分,我已经画图说明
        Qsort();                     //此时我们已经更新出了第二关键字,利用上一轮的rk更新本轮的sa
        swap(tp, rk);                //这里原本tp已经没有用了
        rk[sa[1]] = p = 1;
        for (int i = 2; i <= n; i++)
            rk[sa[i]] = (tp[sa[i - 1]] == tp[sa[i]] && tp[sa[i - 1] + w] == tp[sa[i] + w]) ? p : ++p;
        //这里当两个后缀上一轮排名相同时本轮也相同
        // Debug();
    }
}
void GetHeight()
{
    int j, k = 0;
    for (int i = 1; i <= n; i++)
    {
        if (k)
            k--;
        j = sa[rk[i] - 1];
        while (s[i + k] == s[j + k])
            k++;
        Height[rk[i]] = k;
    }
}
/* 本质不同的子串的数量
枚举每一个后缀,第i个后缀对答案的贡献为n-sa[i]+1-Height[i]*/
/* 长度不小于k的不同本质子串数量 */
int main()
{
    int k;
    while (~scanf("%d%d", &n, &k))
    {
        scanf(" %s", s + 1);
        n = strlen(s + 1);
        SuffixSort();
        GetHeight();
        ll ans = 0;
        for (int i = 1; i <= n; i++)
            ans += (n - sa[i] + 1) - min(max(k - 1, Height[i]), n - sa[i] + 1);
        printf("%lld\n", ans);
    }
    return 0;
}

Guess you like

Origin www.cnblogs.com/graytido/p/11578989.html