K-th occurrence
Time Limit: 3000/3000 MS (Java/Others) Memory Limit: 524288/524288 K (Java/Others)
Total Submission(s): 1084 Accepted Submission(s): 343
Problem Description
You are given a string S consisting of only lowercase english letters and some queries.
For each query (l,r,k), please output the starting position of the k-th occurence of the substring SlSl+1...Sr in S.
Input
The first line contains an integer T(1≤T≤20), denoting the number of test cases.
The first line of each test case contains two integer N(1≤N≤105),Q(1≤Q≤105), denoting the length of S and the number of queries.
The second line of each test case contains a string S(|S|=N) consisting of only lowercase english letters.
Then Q lines follow, each line contains three integer l,r(1≤l≤r≤N) and k(1≤k≤N), denoting a query.
There are at most 5 testcases which N is greater than 103.
Output
For each query, output the starting position of the k-th occurence of the given substring.
If such position don't exists, output −1 instead.
Sample Input
2 12 6 aaabaabaaaab 3 3 4 2 3 2 7 8 3 3 4 2 1 4 2 8 12 1 1 1 a 1 1 1
Sample Output
5 2 -1 6 9 8 1
Source
分析:求某个子串出现第k次的位置,那么我们首先应该把这个子串出现的地方全部找出来,然后找第k大,就是答案。
整体思路大概是这样的,对s串构建一个后缀数组,对于给定的子串,首先找到以这个子串左端点的一个后缀L,那么找出所有与L的lcp>=R-L+1的后缀,这些后缀的前缀就是该子串,又有lcp的单调性,所以这些后缀在sa数组中必然是连续的一段,所以预处理一个区间最小值,就可以两次二分找出所有lcp>=R-L+1的后缀,那么最后再求一个区间第k大就可以了。注意一下细节就可以过了。
#include "bits/stdc++.h"
using namespace std;
char s[110004];
int sa[110004], x[110004], y[110004], c[110004], n, m, height[110004];
int rk[110004];
int dp[110004][40];
void Suffix() {
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[i] = s[i]]++;
for (int i = 0; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1) {
int p = 0;
for (int i = n - k; i < n; i++) y[p++] = i;
for (int i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k;
for (int i = 0; i < m; i++) c[i] = 0;
for (int i = 0; i < n; i++) c[x[y[i]]]++;
for (int i = 0; i < m; i++) c[i] += c[i - 1];
for (int i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
swap(x, y);
p = 1;
x[sa[0]] = 0;
for (int i = 1; i < n; i++)
x[sa[i]] = y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k] ? p - 1 : p++;
if (p >= n) break;
m = p;
}
}
void getheight() {
for (int i = 0; i < n; i++) rk[sa[i]] = i;
for (int i = 0, k = 0; i < n; i++) {
if (rk[i]) {
if (k) --k;
else k = 0;
int j = sa[rk[i] - 1];
while (s[i + k] == s[j + k]) k++;
height[rk[i]] = k;
}
}
}
void initdp() {
for (int i = 0; i < n; ++i) {
dp[i][0] = height[i];
}
for (int j = 1; (1 << j) < n; ++j) {
for (int i = 0; i + (1 << j) - 1 < n; ++i) {
dp[i][j] = 0x3f3f3f3f;
dp[i][j] = min(dp[i][j - 1], dp[i + (1 << j - 1)][j - 1]);
}
}
}
int quemini(int l, int r) {
if (l > r)swap(l, r);
l++;
int k = log2(r - l + 1);
return min(dp[l][k], dp[r - (1 << k) + 1][k]);
}
int getlcp(int l, int r) {
return quemini(l, r);
}
struct tree {
int ls, rs, sum;
} t[110004 * 20];
int root[110004], cnt, Rank[110004];
void init() { root[0] = cnt = t[0].ls = t[0].rs = t[0].sum = 0; }
int updata(int edi, int l, int r, int w) {
int pos = ++cnt;
t[pos] = t[edi];
t[pos].sum++;
if (l == r)return pos;
int mid = (l + r) >> 1;
if (w <= mid)t[pos].ls = updata(t[edi].ls, l, mid, w);
else t[pos].rs = updata(t[edi].rs, mid + 1, r, w);
return pos;
}
int que(int i, int j, int l, int r, int k) {
if (l == r)return l;
int d = t[t[j].ls].sum - t[t[i].ls].sum;
int mid = (l + r) >> 1;
if (k <= d)return que(t[i].ls, t[j].ls, l, mid, k);
else return que(t[i].rs, t[j].rs, mid + 1, r, k - d);
}
int quek(int l, int r, int k) {
int ans = que(root[l - 1], root[r], 1, n, k);
return ans;
}
int main() {
int t;
cin >> t;
while (t--) {
init();
int q;
scanf("%d%d", &n, &q);
scanf("%s", s);
s[n++] = 0;
m = 128;
Suffix();
getheight();
initdp();
for (int i = 1; i <= n; ++i) {
root[i] = updata(root[i - 1], 1, n, sa[i] + 1);
}
int l, r, k;
while (q--) {
scanf("%d%d%d", &l, &r, &k);
l--, r--;
int len = r - l + 1;
int L = rk[l], R = rk[l];
int ql = 1, qr = rk[l] - 1;
while (ql <= qr) {
int mid = ql + qr >> 1;
int temp = getlcp(mid, rk[l]);
if (temp >= len) {
L = mid;
qr = mid - 1;
} else ql = mid + 1;
}
ql = rk[l] + 1, qr = n;
while (ql <= qr) {
int mid = ql + qr >> 1;
int temp = getlcp(rk[l], mid);
if (temp >= len) {
R = mid;
ql = mid + 1;
} else qr = mid - 1;
}
if (R - L + 1 < k)puts("-1");
else printf("%d\n", quek(L, R, k));
}
}
return 0;
}