poj 2778 DNA Sequence AC自动机+矩阵快速幂

DNA Sequence

Time Limit: 1000MS		Memory Limit: 65536K
Total Submissions: 19923		Accepted: 7585

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence，For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G，and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

题意：给出m个DNA序列，问有多少个一个长为n且不包含以上序列的DNA序列

n的长度为2e9，普通的dp是不行的，所以需要将dp的递推转为矩阵乘法然后快速幂。

矩阵元素c[i][j]表示从自动机状态i到状态j的方案个数，那么长为n只需要乘n次。类似hdu2157的构建方式。

#include<stdio.h>
#include<algorithm>
#include<string.h>
#include<queue>
using namespace std;
#define ll long long
const int mod = 100000;
const int maxm = 125;
int n;
int tr[maxm][5], fail[maxm], last[maxm], cnt, id[maxm];
char str[maxm];
struct node
{
	ll c[maxm][maxm];
}f;
void init()
{
	memset(last, 0, sizeof(last));
	memset(tr, 0, sizeof(tr));
	memset(fail, 0, sizeof(fail));
	cnt = 0;
}
node multi(node a, node b)
{
	node x = { 0 };
	for (int i = 0;i <= cnt;i++)
	{
		for (int j = 0;j <= cnt;j++)
		{
			for (int k = 0;k <= cnt;k++)
			{
				x.c[i][j] += (a.c[i][k] * b.c[k][j]);
				x.c[i][j] %= mod;
			}
		}
	}
	return x;
}
node ksm(node rev, int k)
{
	node x = { 0 };
	for (int i = 0;i <= cnt;i++)
		x.c[i][i] = 1;
	while (k)
	{
		if (k % 2 == 1)
			x = multi(x, rev);
		rev = multi(rev, rev);
		k /= 2;
	}
	return x;
}
void insert()
{
	int len = strlen(str);
	int now = 0;
	for (int i = 0;i < len;i++)
	{
		int num = id[str[i]];
		if (!tr[now][num])
			tr[now][num] = ++cnt;
		now = tr[now][num];
	}
	last[now] = 1;
}
void find_fail()
{
	int now;
	queue<int>q;
	for (int i = 0;i < 4;i++)
		if (tr[0][i]) q.push(tr[0][i]);
	while (!q.empty())
	{
		now = q.front();q.pop();
		for (int i = 0;i < 4;i++)
		{
			if (tr[now][i])
			{
				fail[tr[now][i]] = tr[fail[now]][i];
				q.push(tr[now][i]);
			}
			else tr[now][i] = tr[fail[now]][i];
			if (last[tr[fail[now]][i]]) last[tr[now][i]] = 1;
		}
	}
}
void work()
{
	node rev = { 0 };
	for (int i = 0;i <= cnt;i++)
	{
		if (last[i]) continue;
		for (int j = 0;j < 4;j++)
		{
			if (!last[tr[i][j]])
				rev.c[i][tr[i][j]]++;
		}
	}
	rev = ksm(rev, n);
	ll ans = 0;
	for (int i = 0;i <= cnt;i++)
		ans = (ans + rev.c[0][i]) % mod;
	printf("%lld\n", ans);
}
int main()
{
	int i, j, k, sum, m;
	id['A'] = 0, id['C'] = 1;
	id['T'] = 2, id['G'] = 3;
	while (scanf("%d%d", &m, &n) != EOF)
	{
		init();
		for (i = 1;i <= m;i++)
		{
			scanf("%s", str);
			insert();
		}
		find_fail();
		work();
	}
	return 0;
}

poj 2778 DNA Sequence AC自动机+矩阵快速幂

猜你喜欢