DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 19923 | Accepted: 7585 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
题意:给出m个DNA序列,问有多少个一个长为n且不包含以上序列的DNA序列
n的长度为2e9,普通的dp是不行的,所以需要将dp的递推转为矩阵乘法然后快速幂。
矩阵元素c[i][j]表示从自动机状态i到状态j的方案个数,那么长为n只需要乘n次。类似hdu2157的构建方式。
#include<stdio.h>
#include<algorithm>
#include<string.h>
#include<queue>
using namespace std;
#define ll long long
const int mod = 100000;
const int maxm = 125;
int n;
int tr[maxm][5], fail[maxm], last[maxm], cnt, id[maxm];
char str[maxm];
struct node
{
ll c[maxm][maxm];
}f;
void init()
{
memset(last, 0, sizeof(last));
memset(tr, 0, sizeof(tr));
memset(fail, 0, sizeof(fail));
cnt = 0;
}
node multi(node a, node b)
{
node x = { 0 };
for (int i = 0;i <= cnt;i++)
{
for (int j = 0;j <= cnt;j++)
{
for (int k = 0;k <= cnt;k++)
{
x.c[i][j] += (a.c[i][k] * b.c[k][j]);
x.c[i][j] %= mod;
}
}
}
return x;
}
node ksm(node rev, int k)
{
node x = { 0 };
for (int i = 0;i <= cnt;i++)
x.c[i][i] = 1;
while (k)
{
if (k % 2 == 1)
x = multi(x, rev);
rev = multi(rev, rev);
k /= 2;
}
return x;
}
void insert()
{
int len = strlen(str);
int now = 0;
for (int i = 0;i < len;i++)
{
int num = id[str[i]];
if (!tr[now][num])
tr[now][num] = ++cnt;
now = tr[now][num];
}
last[now] = 1;
}
void find_fail()
{
int now;
queue<int>q;
for (int i = 0;i < 4;i++)
if (tr[0][i]) q.push(tr[0][i]);
while (!q.empty())
{
now = q.front();q.pop();
for (int i = 0;i < 4;i++)
{
if (tr[now][i])
{
fail[tr[now][i]] = tr[fail[now]][i];
q.push(tr[now][i]);
}
else tr[now][i] = tr[fail[now]][i];
if (last[tr[fail[now]][i]]) last[tr[now][i]] = 1;
}
}
}
void work()
{
node rev = { 0 };
for (int i = 0;i <= cnt;i++)
{
if (last[i]) continue;
for (int j = 0;j < 4;j++)
{
if (!last[tr[i][j]])
rev.c[i][tr[i][j]]++;
}
}
rev = ksm(rev, n);
ll ans = 0;
for (int i = 0;i <= cnt;i++)
ans = (ans + rev.c[0][i]) % mod;
printf("%lld\n", ans);
}
int main()
{
int i, j, k, sum, m;
id['A'] = 0, id['C'] = 1;
id['T'] = 2, id['G'] = 3;
while (scanf("%d%d", &m, &n) != EOF)
{
init();
for (i = 1;i <= m;i++)
{
scanf("%s", str);
insert();
}
find_fail();
work();
}
return 0;
}