BZOJ4259: mangled string

【Portal: BZOJ4259


Brief title:

  Given two strings, the length of the first string is m, and the length of the second string is n. If there is a * character in the string, it means that the current position can match any character

  Find the number of occurrences of the first string in the second string, and the position of the beginning of the occurrence in the second string (output from small to large)


answer:

  FFT, wildcard matching

  For two strings of length i, their similarity is $\sum_{j=0}^{i-1}(A[j]-B[j])^2$(A[j]!='* '&&B[j]!='*')

  Set * to 0, then get $\sum_{j=0}^{i-1}(A[j]-B[j])^2A[j]B[j]$

  Obviously only when $\sum_{j=0}^{i-1}(A[j]-B[j])^2A[j]B[j]$ is 0, the A string and the B string can be completely matched

  Then for this question, let f(i) be the similarity of the substring of length n ending at the i position of B with the A string

  First n--, m-- (convenient to write formulas), and then add 0 after A

  显然$f(i)=\sum_{j=0}^{m}(A[j]-B[i-m+j])^2A[j]B[i-m+j]$

  We flip the A array and we get $f(i)=\sum_{j=0}^{i}(A[j]-B[ij])^2A[j]B[ij]$

  Then unpack this formula to get $f(i)=\sum_{j=0}^{i}A[j]^3B[ij]-2*\sum_{j=0}^{i}A [j]^2B[ij]^2+\sum_{j=0}^{i}A[j]*B[ij]^3$

  Everyone is happy, you can directly find the three FFTs separately.


Reference Code:

#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<algorithm>
#include<cmath>
using namespace std;
typedef long long LL;
const double PI=acos(-1.0);
struct Complex
{
    double r,i;
    Complex(){}
    Complex(double _r,double _i){r=_r;i=_i;}
    friend Complex operator + (const Complex &x,const Complex &y){return Complex(x.r+y.r,x.i+y.i);}
    friend Complex operator - (const Complex &x,const Complex &y){return Complex(x.r-y.r,x.i-y.i);}
    friend Complex operator * (const Complex &x,const Complex &y){return Complex(x.r*y.r-x.i*y.i,x.r*y.i+x.i*y.r);}
}a[1300000],b[1300000];
int R[1300000];
void fft(Complex *y,int len,int on)
{
    for(int i=0;i<len;i++) if(i<R[i]) swap(y[i],y[R[i]]);
    for(int i=1;i<len;i<<=1)
    {
        Complex wn(cos(PI/i),sin(on*PI/i));
        for(int j=0;j<len;j+=(i<<1))
        {
            Complex w(1,0);
            for(int k=0;k<i;k++,w=w*wn)
            {
                Complex u=y[j+k];
                Complex v=w*y[j+k+i];
                y[j+k]=u+v;
                y[j+k+i]=u-v;
            }
        }
    }
    if(on==-1) for(int i=0;i<=len;i++) y[i].r/=len;
}
void calc(int n,int m)
{
    int L=0;m+=n;
    for(n=1;n<=m;n<<=1) L++;
    memset(R,0,sizeof(R));
    for(int i=0;i<n;i++) R[i]=(R[i>>1]>>1)|(i&1)<<(L-1);
    fft(a,n,1);fft(b,n,1);
    for(int i=0;i<=n;i++) a[i]=a[i]*b[i];
    fft(a,n,-1);
}
char s1[310000],s2[310000];
int A[310000],B[310000];
int q[310000];
double f[310000];
int main()
{
    int n,m;
    scanf("%d%d",&n,&m);n--;m--;
    scanf("%s%s",s1,s2);
    for(int i=0;i<=n;i++)
    {
        if(s1[n-i]=='*') A[i]=0;
        else A[i]=s1[n-i]-'a'+1;
    }
    for(int i=0;i<=m;i++)
    {
        if(s2[i]=='*') B[i]=0;
        else B[i]=s2[i]-'a'+1;
    }
    memset(f,0,sizeof(f));
    for(int i=0;i<=n;i++) a[i].r=A[i]*A[i]*A[i];
    for(int i=0;i<=m;i++) b[i].r=B[i];
    calc(n,m);
    for(int i=0;i<=m;i++) f[i]+=a[i].r;
    memset(a,0,sizeof(a));
    memset(b,0,sizeof(b));
    for(int i=0;i<=n;i++) a[i].r=A[i]*A[i];
    for(int i=0;i<=m;i++) b[i].r=B[i]*B[i];
    calc(n,m);
    for(int i=0;i<=m;i++) f[i]-=2.0*a[i].r;
    memset(a,0,sizeof(a));
    memset(b,0,sizeof(b));
    for(int i=0;i<=n;i++) a[i].r=A[i];
    for(int i=0;i<=m;i++) b[i].r=B[i]*B[i]*B[i];
    calc(n,m);
    for(int i=0;i<=m;i++) f[i]+=a[i].r;
    int cnt=0;
    for(int i=n;i<=m;i++) if(f[i]<0.5) q[++cnt]=i-n;
    printf("%d\n",cnt);
    if(cnt>0)
    {
        for(int i=1;i<cnt;i++) printf("%d ",q[i]+1);
        printf("%d\n",q[cnt]+1);
    }
    return 0;
}

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325222427&siteId=291194637