「UVA 11468」Substring

Description

给定 \(n\) 个字符 \(c_1, c_2, \cdots c_n\) 和各自的选择概率 \(p_1, p_2, \cdots, p_n\)，随机选择 \(L\) 次得到的随机字符串 \(S\) （每次独立随机）。给出 \(K\) 个模板串 \(t_1, t_2, \cdots, t_K\)，计算 \(S\) 不包含任何一个模板串的概率。

多组数据，组数为 \(T\)。

Hint

\(1\le T \le 50\)
\(1\le K\le 20\)
\(1\le |t_i|\le 20\)
\(c_i \in [\texttt{a}, \texttt{z}] \cup [\texttt{A}, \texttt{Z}] \cup [\texttt{0}, \texttt{9}]\)
\(1\le L \le 100\)
\(\sum_{i = 1}^n p_i = 1\)
\(\forall i \in [1, K], t_i\) 由 \(n\) 个给定字符组成。

Solution

AC 自动机 + 动态规划

首先肯定是对所有 模板串 建 AC自动机，在模式串结尾标记一下表示不能走。然后按之类问题的套路来：

设 \(f(i, x)\) 为字符串长度为 \(i\)，走到结点 \(x\) 且不走到标记结点的概率。

那么自然是 \(f(i - 1, x)\) 转移到 \(f(i, \delta(x, c))\) 的方向。状态转移方程：

\[f(i, y) = \sum\limits_{\delta(x, c) = y} f(i - 1, x) \times \text{prob}(c) \]

其中 \(\text{prob}(c)\) 表示选择字符 \(c\) 的概率。显然如果 \(y\) 点北标记为结尾就要跳过。

像这种 dp 会有一个坑：当一个结点的 fail 对应的结点是被标记了的，那么这个结点也应该被标记。为什么呢？因为 fail 指向的是当前的最长后缀，而一个以一个模板串作为后缀的字符串自然也不合法。

Code

#include <cctype>
#include <cstdio>
#include <cstring>
#include <queue>

using namespace std;
const int P_L = 25;
const int M_L = 105;
const int K = 25;
const int S = 62;

int k, n, l;
double prob[S];

inline int trans(char c) {
	if (isdigit(c)) return c - '0';
	else if (islower(c)) return c - 'a' + 10;
	else return c - 'A' + 36;
}

namespace ACAM {
	const int T = K * P_L;
	struct Node {
		int ch[S];
		int fail;
		bool statu;
	} t[T];
	double f[M_L][T];
	int total;
	
	void restart() {
		using namespace ACAM;
		memset(f, 0, sizeof f);
		memset(t, 0, sizeof t);
		total = 0;
	}
	
	void insert(char* s) {
		int x = 0;
		for (register int i = 0; s[i]; i++) {
			int c = trans(s[i]);
			if (!t[x].ch[c]) t[x].ch[c] = ++total;
			x = t[x].ch[c];
		}
		t[x].statu = true;
	}
	
	void build_fail() {
		queue<int> Q;
		for (register int i = 0; i < S; i++)
			if (t[0].ch[i]) Q.push(t[0].ch[i]), t[t[0].ch[i]].fail = 0;
		while (!Q.empty()) {
			int x = Q.front(); Q.pop();
			for (register int c = 0; c < S; c++)
				if (t[x].ch[c]) {
					Q.push(t[x].ch[c]);
					t[t[x].ch[c]].fail = t[t[x].fail].ch[c];
				} else t[x].ch[c] = t[t[x].fail].ch[c];
			t[x].statu |= t[t[x].fail].statu; // 此坑的解决方案在此体现
		}
	}
	
	double solve(int l) {
		f[0][0] = 1.0;
		for (register int i = 1; i <= l; i++)
			for (register int x = 0; x <= total; x++)
				for (register int c = 0; c < S; c++)
					if (!t[t[x].ch[c]].statu)
						f[i][t[x].ch[c]] += f[i - 1][x] * prob[c];
		double ret = 0.0;
		for (register int i = 0; i <= total; i++)
			ret += f[l][i];
		return ret;
	}
};

char str[M_L];
signed main() {	
	int total_tc;
	scanf("%d", &total_tc);
	
	for (int tc = 1; tc <= total_tc; tc++) {
		ACAM::restart();
		memset(prob, 0, sizeof prob);
		
		scanf("%d", &k);
		for (register int i = 1; i <= k; i++) {
			scanf("%s", str);
			ACAM::insert(str);
		}
		
		scanf("%d", &n);
		for (register int i = 1; i <= n; i++) {
			char lt[5]; double pb;
			scanf("%s%lf", lt, &pb);
			prob[trans(lt[0])] += pb;
		}
		
		ACAM::build_fail();
		scanf("%d", &l);
		double ans = ACAM::solve(l);
		
		printf("Case #%d: %.6f\n", tc, ans);
	}
	return 0;
}