K-th occurrence (后缀自动机上合并权值线段树+树上倍增)

You are given a string S S consisting of only lowercase english letters and some queries.
For each query ( l , r , k ) (l,r,k) , please output the starting position of the k-th occurence of the substring S l S l + 1 S r S_lS_{l+1}\cdots S_r in S S .
Input
The first line contains an integer T ( 1 T 20 ) T(1≤T≤20) , denoting the number of test cases.
The first line of each test case contains two integer N ( 1 N 1 0 5 ) , Q ( 1 Q 1 0 5 ) N(1≤N≤10^5),Q(1≤Q≤10^5) , denoting the length of S S and the number of queries.
The second line of each test case contains a string S ( S = N ) S(|S|=N) consisting of only lowercase english letters. Then Q Q lines follow, each line contains three integer l , r ( 1 l r N ) l,r(1≤l≤r≤N) and k ( 1 k N ) k(1≤k≤N) , denoting a query. There are at most 5 testcases which N N is greater than 1 0 3 10^3 .
Output
For each query, output the starting position of the k-th occurence of the given substring. If such position don’t exists, output -1 instead.
Sample Input
2
12 6
aaabaabaaaab
3 3 4
2 3 2
7 8 3
3 4 2
1 4 2
8 12 1
1 1
a
1 1 1
Sample Output
5
2
-1
6
9
8
1

hdu6704

题目大意
给出一个字符串 S ( S 1 0 5 ) S(|S|\leq 10^5) ,然后有 Q ( Q 1 0 5 ) Q(Q\leq 10^5) 个询问,每个询问包含三个正整数 L , R , K ( 1 L R S , 1 K S ) L,R,K(1\leq L\leq R\leq |S|,1\leq K\leq |S|) ,代表询问 S L S L + 1 S R S_LS_{L+1}\cdots S_R 这个 S S 的子串在 S S 中第 K K 次出现的位置是哪里,如果 S S 的这个子串出现次数小于 K K ,就输出-1。
题目总览
首先,这道题要求S的某个子串的出现位置,毫无疑问就是求这个子串在后缀自动机上的endpos。而endpos是对应于自动机上的状态节点的,因此我们首先要找到S所求的子串对应的状态节点。接着,我们要求所有状态节点的endpos集合,并支持查询第 K K 小,这样就能建立子串-状态节点-endpos集合的关系,从而能求出相应子串的第 K K 次出现的位置了。
求子串对应的状态节点
首先,我们定义 u [ i ] u[i] S S 从1到 i i 的子串对应于自动机上的状态节点编号。在构造自动机的过程中,我们是从自动机的初始状态出发的,因此u数组可以边构造边更新。

for (int i = 0; i < N; ++i) {
	//自动机插入一个字符
 	 sam.insert(S[i] - 'a');
 	 //等于该字符插入时的状态,编号从1开始
	 u[i + 1] = sam.cur;
}

现在,我们知道了从1开始的所有子串对应的状态节点了,那么怎样求从l到r对应的状态节点呢?显然l到r是1到r的后缀,那么根据后缀自动机的性质,l到r要么是1到r对应的节点,要么是其后缀连接树上的父节点。设p为l到r的对应状态,那么p满足 m a x l e n ( l i n k ( p ) ) < ( r l + 1 ) m a x l e n ( p ) maxlen(link(p))<(r-l+1)\leq maxlen(p) ,即1到r的满足 ( r l + 1 ) m a x l e n ( p ) (r-l+1)\leq maxlen(p) 的第一个父节点(或它本身)。这样的话,我们只要根据后缀连接边来建一棵树,然后不断往父节点跳知道满足条件即可。但是,这样的话时间复杂度就是 O ( S ) O(|S|) ,而对应于 Q Q 个询问显然会超时。不过对于树上跳,我们可以采用树上倍增的方式进行优化。
D e p t h [ i ] Depth[i] 表示第i个节点的深度
F a t h e r [ i ] [ j ] Father[i][j] 表示第i个节点的第 2 j 2^j 级祖先的编号
L o g [ i ] Log[i] 表示 l o g 2 i log_2i 向上取整
E n d P o s T o T r e e [ i ] EndPosToTree[i] 表示状态节点i对应于线段树上相应的根的编号

class {
	int Depth[2 * MAXN];
	int Father[2 * MAXN][22];
	int Log2[2 * MAXN];
	//树上DFS求Depth和Father数组
	void DFS(int root, int father = 0) {
		Depth[root] = Depth[father] + 1;
		Father[root][0] = father;
		//一个节点的2^i祖先等于他的2^i-1祖先的2^i-1祖先
		for (int i = 1; i <= Log2[Depth[root]]; ++i) {
			Father[root][i] = Father[Father[root][i - 1]][i - 1];
		}
		for (const auto& Next : Edges[root]) {
			if (Next != father) {
				DFS(Next, root);
			}
		}
	}
public:
	//记录树结构
	vector<int> Edges[2 * MAXN];
	//初始化log2,优化时间
	void InitLog2() {
		for (int i = 1; i < 2 * MAXN; ++i) {
			Log2[i] = Log2[i - 1] + (1 << Log2[i - 1] == i);
		}
	}
	void AddEdge(const int& x, const int& y) {
		this->Edges[x].emplace_back(y);
		this->Edges[y].emplace_back(x);
	}
	//树上倍增
	int getAncestor(int Point, bool (*Condition)(const int&))const {
		//从最高级别的祖先起跳
		for (int k = Log2[Depth[Point]]; k >= 0; --k) {
			//满足条件就跳
			if (Condition(Father[Point][k])) {
				Point = Father[Point][k];
			}
		}
		return Point;
	}
	void Clear() {
		memset(Depth, 0x0, sizeof(Depth));
		memset(Father, 0x0, sizeof(Father));
		for (int i = 0; i <= 2 * N + 1; ++i) {
			Edges[i].clear();
		}
	}
	void Init() {
		Clear();
		for (int i = 1; i < sam.size; ++i) {
			const int& Link = sam.node[i].link;
			MultiplyOnTree.AddEdge(i, Link);
		}
		DFS(0);
	}
}MultiplyOnTree;

求状态节点的endpos集合并维护第k小
根据后缀自动机的性质,一个节点的endpos集合等于其所有后缀连接树上子节点的endpos集合的交集,即父节点是子节点的后缀。而叶节点的endpos集合就是自动机构造时插入的字符的位置,大小为1。但是在寻找第k小时,如果 O ( N ) O(N) 遍历,对于 Q Q 个询问显然后超时。不过我们可以用合并权值线段树的方法来维护第k小。
T r e e [ i ] Tree[i] 表示线段树上的i个节点所管辖的范围有多少个数
L e f t S o n [ i ] , R i g h t S o n [ i ] LeftSon[i],RightSon[i] 表示第i个节点的左右儿子的编号
R a d i x , O r d e r Radix,Order 用于计数排序

class SegmentTree {
	friend class SAM;
	int Tot;
	int
		Tree[50 * MAXN],
		LeftSon[50 * MAXN],
		RightSon[50 * MAXN],
		Radix[MAXN],
		Order[2 * MAXN];
public:
	void Clear() {
		Tot = 0;
		memset(Tree, 0x0, sizeof(Tree));
		memset(Radix, 0x0, sizeof(Radix));
	}
	SegmentTree() = default;
	//合并两棵权值线段树
	int Merge(int Left, int Right) {
		if (!Left || !Right) {
			return Left | Right;
		}
		int NewRoot = ++Tot;
		LeftSon[NewRoot] = Merge(LeftSon[Left], LeftSon[Right]);
		RightSon[NewRoot] = Merge(RightSon[Left], RightSon[Right]);
		this->Tree[NewRoot] = this->Tree[LeftSon[NewRoot]] + this->Tree[RightSon[NewRoot]];
		return NewRoot;
	}
	//单点更新
	void Update(int Loc, int& root, int Left = 1, int Right = N) {
		//如果是新加入的根节点,就为它分配一个新节点编号
		if (!root) {
			root = ++Tot;
			Tree[root] = LeftSon[root] = RightSon[root] = 0;
		}
		//找到了对应的位置,数量++
		if (Left == Right) {
			++Tree[root];
			return;
		}
		int&& mid = (Right + Left) >> 1;
		if (Loc <= mid) {
			Update(Loc, LeftSon[root], Left, mid);
		}
		else {
			Update(Loc, RightSon[root], mid + 1, Right);
		}
		//UpGrade
		this->Tree[root] = this->Tree[LeftSon[root]] + this->Tree[RightSon[root]];
	}
	//寻找第k小
	int getK_th(int Left, int Right, int K, int root) {
		if (Left == Right) {
			return Left;
		}
		int&& mid = (Left + Right) >> 1;
		const int
			& LeftSonValue = this->Tree[this->LeftSon[root]],
			& RightSonValue = this->Tree[this->RightSon[root]];
		//如果左边的数量大于k,说明在左边
		if (LeftSonValue >= K) {
			return getK_th(Left, mid, K, LeftSon[root]);
		}
		//如果右边的数量大于k-左边的数量,说明在右边
		else if (RightSonValue >= K - LeftSonValue) {
			return getK_th(mid + 1, Right, K - LeftSonValue, RightSon[root]);
		}
		//反则就不存在
		else {
			return -1;
		}
	}
	int getAns(int K, int P, int Left = 1, int Right = N) {
		int&& Ans = this->getK_th(Left, Right, K, EndPosToTree[P]);
		//子串右端位置转左端位置
		return (Ans == -1 ? -1 : Ans - (R - L));
	}
	//计数排序
	void CountingSort() {
		for (int i = 0; i < sam.size; ++i) {
			++Radix[sam.node[i].len];
		}
		for (int i = 1; i <= sam.node[sam.cur].len; ++i) {
			Radix[i] += Radix[i - 1];
		}
		for (int i = 0; i < sam.size; ++i) {
			Order[Radix[sam.node[i].len]--] = i;
		}
	}
	void Init() {
		CountingSort();
		//按maxlen(p)从小到大更新,就能保父节点后于子节点更新
		for (int i = sam.size; i > 1; --i) {
			const int& Son = Order[i];
			const int& Father = sam.node[Son].link;
			EndPosToTree[Father] = SegmentTrees.Merge(EndPosToTree[Father], EndPosToTree[Son]);
		}
	}
}SegmentTrees;

AC代码

#include<iostream>
#include<cstring>
#include<vector>
using namespace std;

const int MAXN = 1e5 + 5;

int N, L, R, K, Q;
char S[MAXN];
int u[MAXN];//1...r
int StateToTree[2 * MAXN];

struct SAM {
	int size, last, cur;
	struct Node {
		int len = 0, link = 0;
		int next[26];
		void clear() {
			len = link = 0;
			memset(next, 0, sizeof(next));
		}
	} node[MAXN * 2];
	void init() {
		for (int i = 0; i < size; i++) {
			node[i].clear();
		}
		node[0].link = -1;
		size = 1;
		last = 0;
	}
	void insert(char x) {
		int ch = x;
		cur = size++;
		node[cur].len = node[last].len + 1;
		int p = last;
		while (p != -1 && !node[p].next[ch]) {
			node[p].next[ch] = cur;
			p = node[p].link;
		}
		if (p == -1) {
			node[cur].link = 0;
		}
		else {
			int q = node[p].next[ch];
			if (node[p].len + 1 == node[q].len) {
				node[cur].link = q;
			}
			else {
				int clone = size++;
				StateToTree[clone] = 0;
				node[clone] = node[q];
				node[clone].len = node[p].len + 1;
				while (p != -1 && node[p].next[ch] == q) {
					node[p].next[ch] = clone;
					p = node[p].link;
				}
				node[q].link = node[cur].link = clone;
			}
		}
		last = cur;
	}
}sam;
class {
	int Depth[2 * MAXN];
	int Father[2 * MAXN][22];
	int Log2[2 * MAXN];
	void DFS(int root, int father = 0) {
		Depth[root] = Depth[father] + 1;
		Father[root][0] = father;
		for (int i = 1; i <= Log2[Depth[root]]; ++i) {
			Father[root][i] = Father[Father[root][i - 1]][i - 1];
		}
		for (const auto& Next : Edges[root]) {
			if (Next != father) {
				DFS(Next, root);
			}
		}
	}
public:
	vector<int> Edges[2 * MAXN];
	void InitLog2() {
		for (int i = 1; i < 2 * MAXN; ++i) {
			Log2[i] = Log2[i - 1] + (1 << Log2[i - 1] == i);
		}
	}
	void AddEdge(const int& x, const int& y) {
		this->Edges[x].emplace_back(y);
		this->Edges[y].emplace_back(x);
	}
	void Start() {
		DFS(0);
	}
	int getAncestor(int Point, bool (*Condition)(const int&))const {
		for (int k = Log2[Depth[Point]]; k >= 0; --k) {
			if (Condition(Father[Point][k])) {
				Point = Father[Point][k];
			}
		}
		return Point;
	}
	void Clear() {
		memset(Depth, 0x0, sizeof(Depth));
		memset(Father, 0x0, sizeof(Father));
		for (int i = 0; i <= 2 * N + 1; ++i) {
			Edges[i].clear();
		}
	}
	void Init() {
		Clear();
		for (int i = 1; i < sam.size; ++i) {
			const int& Link = sam.node[i].link;
			MultiplyOnTree.AddEdge(i, Link);
		}
		Start();
	}
}MultiplyOnTree;
class SegmentTree {
	friend class SAM;
	int Tot;
	int
		Tree[50 * MAXN],
		LeftSon[50 * MAXN],
		RightSon[50 * MAXN],
		Radix[MAXN],
		Order[2 * MAXN];
public:
	void Clear() {
		Tot = 0;
		memset(Tree, 0x0, sizeof(Tree));
		memset(Radix, 0x0, sizeof(Radix));
	}
	SegmentTree() = default;
	int Merge(int Left, int Right) {
		if (!Left || !Right) {
			return Left | Right;
		}
		int NewRoot = ++Tot;
		LeftSon[NewRoot] = Merge(LeftSon[Left], LeftSon[Right]);
		RightSon[NewRoot] = Merge(RightSon[Left], RightSon[Right]);
		this->Tree[NewRoot] = this->Tree[LeftSon[NewRoot]] + this->Tree[RightSon[NewRoot]];
		return NewRoot;
	}
	void Update(int Loc, int& root, int Left = 1, int Right = N) {
		if (!root) {
			root = ++Tot;
			Tree[root] = LeftSon[root] = RightSon[root] = 0;
		}
		if (Left == Right) {
			++Tree[root];
			return;
		}
		int&& mid = (Right + Left) >> 1;
		if (Loc <= mid) {
			Update(Loc, LeftSon[root], Left, mid);
		}
		else {
			Update(Loc, RightSon[root], mid + 1, Right);
		}
		this->Tree[root] = this->Tree[LeftSon[root]] + this->Tree[RightSon[root]];
	}
	int getK_th(int Left, int Right, int K, int root) {
		if (Left == Right) {
			return Left;
		}
		int&& mid = (Left + Right) >> 1;
		const int
			& LeftSonValue = this->Tree[this->LeftSon[root]],
			& RightSonValue = this->Tree[this->RightSon[root]];
		if (LeftSonValue >= K) {
			return getK_th(Left, mid, K, LeftSon[root]);
		}
		else if (RightSonValue >= K - LeftSonValue) {
			return getK_th(mid + 1, Right, K - LeftSonValue, RightSon[root]);
		}
		else {
			return -1;
		}
	}
	int getAns(int K, int P, int Left = 1, int Right = N) {
		int&& Ans = this->getK_th(Left, Right, K, StateToTree[P]);
		return (Ans == -1 ? -1 : Ans - (R - L));
	}
	void CountingSort() {
		for (int i = 0; i < sam.size; ++i) {
			++Radix[sam.node[i].len];
		}
		for (int i = 1; i <= sam.node[sam.cur].len; ++i) {
			Radix[i] += Radix[i - 1];
		}
		for (int i = 0; i < sam.size; ++i) {
			Order[Radix[sam.node[i].len]--] = i;
		}
	}
	void Init() {
		CountingSort();
		for (int i = sam.size; i > 1; --i) {
			const int& Son = Order[i];
			const int& Father = sam.node[Son].link;
			StateToTree[Father] = SegmentTrees.Merge(StateToTree[Father], StateToTree[Son]);
		}
	}
}SegmentTrees;


void Init() {
	sam.init();
	SegmentTrees.Clear();
}
void Input() {
	scanf("%d%d", &N, &Q);
	scanf("%s", S);
	Init();
	for (int i = 0; i < N; ++i) {
		sam.insert(S[i] - 'a');
		StateToTree[sam.cur] = 0;
		SegmentTrees.Update(i + 1, StateToTree[sam.cur]);
		u[i + 1] = sam.cur;
	}
}

bool Condition(const int& Point) {
	return R - L + 1 <= sam.node[Point].len;
}
int main() {
	int T;
	scanf("%d", &T);
	MultiplyOnTree.InitLog2();
	while (T--) {
		Input();
		MultiplyOnTree.Init();
		SegmentTrees.Init();
		while (Q--) {
			scanf("%d%d%d", &L, &R, &K);
			const int&& P = MultiplyOnTree.getAncestor(u[R], Condition);
			printf("%d\n", SegmentTrees.getAns(K, P));
		}
	}
	return 0;
}
发布了55 篇原创文章 · 获赞 63 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/qq_42971794/article/details/104097180