CodeForces528D Fuzzy Search

S o u r c e : Codeforces Round #296 (Div. 1)
P r o b l e m : 有两个基因串S和T,他们只包含AGCT四种字符。现在你要找出T在S中出现了几次。 有一个门限值k≥0。T在S的第i(1≤i≤|S|-|T|+1)个位置中出现的条件如下:把T的开头和S的第i个字符对齐,然后T中的每一个字符能够在S中找到一样的,且位置偏差不超过k的,那么就认为T在S的第i个位置中出现。也就是说对于所有的 j (1≤j≤|T|),存在一个 p (1≤p≤|S|),使得|(i+j-1)-p|≤k 和[p]=T[j]都成立。 例如,根据这样的定义”ACAT”出现在”AGCAATTCAT”的第2,3和6的位置。


I d e a :
考虑 k = 0 时用FFT做字符串匹配,枚举“AGCT”,只有被枚举的字符所在的位置值为1,其他位置值为0。那么翻转T串,得到卷积 f ( i ) = k = 0 n 1 S ( i + k ) T ( n k 1 ) == n
那么 k > 0 时,同样枚举到’A’,对于S串中的位置i,如果它的左右k范围内存在S[j]=’A’,那么同样将它的值设为1,如果T[i]=’A’,就得到了T[i]的贡献。这部分随便差分一下就好了。
C o d e :

#include<bits/stdc++.h>
using namespace std;

#define lc o<<1
#define rc o<<1|1
#define fi first
#define se second
#define pb push_back
#define ALL(X) (X).begin(), (X).end()
#define bcnt(X) __builtin_popcountll(X)
#define CLR(A, X) memset(A, X, sizeof(A))
#pragma comment(linker, "/STACK:1024000000,1024000000")
#define DEBUG printf("Passing [%s] in Line %d\n",__FUNCTION__,__LINE__)

using DB = double;
using LL = long long;
using PII = pair<int, int>;
const int N = 8e5+10;
const LL MOD = 1e9+7;
const DB PI = acos(-1.0);

struct Complex {
    double x, y;
    Complex(double x = 0.0, double y = 0.0): x(x), y(y) { }
    Complex operator - (const Complex &b) const { return Complex(x-b.x, y-b.y); }
    Complex operator + (const Complex &b) const { return Complex(x+b.x, y+b.y); }
    Complex operator * (const Complex &b) const { return Complex(x*b.x-y*b.y, x*b.y+y*b.x); }
}a[4*N], b[4*N];

void Rader(Complex *y, int len) {
    for(int i = 1, j = len/2; i < len-1; i++) {
        if(i < j) swap(y[i], y[j]);
        int k = len/2;
        while(j >= k) j -= k, k /= 2;
        if(j < k) j += k;
    }
}

void FFT(Complex *y, int len, int op) {
    Rader(y, len);
    for(int h = 2; h <= len; h <<= 1) {
        Complex wn(cos(-op*2*PI/h), sin(-op*2*PI/h));
        for(int j = 0; j < len; j += h) {
            Complex w(1, 0);
            for(int k = j; k < j+h/2; k++) {
                Complex u = y[k];
                Complex t = w*y[k+h/2];
                y[k] = u+t;
                y[k+h/2] = u-t;
                w = w*wn;
            }
        }
    }
    if(op == -1) {
        for(int i = 0; i < len; i++)
            y[i].x /= len;
    }
}

int Conv(Complex *a, int la, Complex *b, int lb) {
    int len = 1;
    while(len < la+lb) len <<= 1;
    for(int i = la; i < len; i++) a[i].x = a[i].y = 0;
    for(int i = lb; i < len; i++) b[i].x = b[i].y = 0;
    FFT(a, len, 1); //DFT
    FFT(b, len, 1);
    for(int i = 0; i < len; i++) a[i] = a[i]*b[i];
    FFT(a, len, -1); //IDFT
    return len;
}

int K, n, m, c[N], ans[N];
bool vis[N];
char S[N], T[N];

void solve(char ch) {
    for(int i = 0; i < m; i++) a[i].x = a[i].y = 0, c[i] = 0;
    for(int i = 0; i < m; i++) {
        if(S[i] == ch) {
            int l = max(i-K, 0), r = min(i+K, m-1);
            c[l]++, c[r+1]--;
        }
    }
    for(int i = 1; i < m; i++) c[i] += c[i-1];
    for(int i = 0; i < m; i++) {
//        printf("%c %d\n", ch, c[i]);
        if(c[i]) a[i].x = 1;
    }
    for(int i = 0; i < n; i++) {
        b[i].x = b[i].y = 0;
        if(T[i] == ch) b[i].x = 1;
    }
    Conv(a, m, b, n);
    for(int i = 0; i < m; i++) {
        ans[i] += a[n+i-1].x+0.5;
//        printf("%.3f %.3f\n", a[i].x, a[n+i-1].x);
    }
}

int main() {
    scanf("%d%d%d", &m, &n, &K);
    scanf("%s%s", S, T);
    reverse(T, T+n);
    solve('A'); solve('G'); solve('C'); solve('T');
    int cnt = 0;
    for(int i = 0; i < m; i++) {
        if(ans[i] == n) cnt++;
//        printf("? = %d %d\n", ans[i], n);
    }
    printf("%d\n", cnt);
    return 0;
}

猜你喜欢

转载自blog.csdn.net/qq_32506797/article/details/81270814
今日推荐