The latest research often FFT card

Pointer with optimization and no eggs, but does not increase the readability of the code.

In addition to optimizing the original cyclic addressing sequence, the root complex pretreatment unit, it can be continuously stored to faster addressing speed, as detailed code.

The code is given FFT, NTT is the same.

#include<bits/stdc++.h>
#define fo(i, x, y) for(int i = x, B = y; i <= B; i ++)
#define ff(i, x, y) for(int i = x, B = y; i <  B; i ++)
#define fd(i, x, y) for(int i = x, B = y; i >= B; i --)
#define ll long long
#define db double
#define pp printf
#define hh pp("\n")
using namespace std;

struct P {
    db x, y;
    P(db _x = 0, db _y = 0) { x = _x, y = _y;}
};

P operator + (P a, P b) { return P(a.x + b.x, a.y + b.y);}
P operator - (P a, P b) { return P(a.x - b.x, a.y - b.y);}
P operator * (P a, P b) { return P(a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x);}

const db pi = acos(-1);

const int nm = 1 << 21;

int r[nm]; P a[nm], b[nm], W[nm];

void dft(P *a, int n, int f) {
    ff(i, 0, n) {
        r[i] = r[i / 2] / 2 + (i & 1) * (n / 2);
        if(i < r[i]) swap(a[i], a[r[i]]);
    } P b;
    for(int i = 1; i < n; i *= 2) for(int j = 0; j < n; j += 2 * i)
        ff(k, 0, i) b = W[i + k] * a[i + j + k], a[i + j + k] = a[j + k] - b, a[j + k] = a[j + k] + b;
    if(f == -1) {
        reverse(a + 1, a + n);
        ff(i, 0, n) a[i].x /= n;
    }
}
void fft(P *a, P *b, int n) {
    dft(a, n, 1); dft(b, n, 1);
    ff(i, 0, n) a[i] = a[i] * b[i];
    dft(a, n, -1);
}

int main() {
    for(int i = 1; i < nm; i *= 2) ff(j, 0, i)
        W[i + j] = P(cos(pi * j / i), sin(pi * j / i));
    ff(i, 0, 1 << 20) a[i].x = b[i].x = i;
    fft(a, b, 1 << 21);
}

Guess you like

Origin www.cnblogs.com/coldchair/p/11122757.html
FFT