快速乘 + Montgomery modular multiplication

O l o g n : O(logn):

ll mult(ll a, ll b, ll p) {
    a %= p;
	b %= p;
    ll ans = 0;
    while(b) {
        if( b&1 ) ans = (ans + a) % p;
        a = (a + a) % p;
        b >>= 1;
    }
    return ans;
}

O 1 : O(1):

出自:
在这里插入图片描述

ll fast_mult(ll a, ll b) {
    return (a*b - (ll)((long double)a/mod*b)*mod+mod)%mod;
}

P S PS:
     ①、double可能会挂,最好long double
     ②、 u , v > = p u,v>=p 可能会挂,必要时先% p p
     ③、用浮点数算出 u v / p u*v/p 的值时事实上允许了 ± 1 ±1 的误差,因此可能出现负数,所以必须 + p +p 再% p p 。因此理论上不需要+ e p s eps

另一种模板:

ll pmod(ll a, ll b) {
	ll d=(ll)floor(a*(long double)b/mod+0.5);
	ll ret=a*b-d*mod;
	if (ret<0) ret+=mod;
	return ret;
}

也称 O ( 1 ) O(1) 快速乘


但还有一种黑科技,根据牛客的一道题:电音之王

O ( 1 ) O(1) 快速乘可以勉强卡过这道题
但根据出题人DLS的一句话题解,正确做法为:Montgomery modular multiplication

暂时没人写详细的博客
这里附上DLS用Montgomery modular multiplication的解题代码:

#include <bits/stdc++.h>
using namespace std;
#define rep(i,a,n) for (int i=a;i<n;i++)
#define per(i,a,n) for (int i=n-1;i>=a;i--)
#define pb push_back
#define mp make_pair
#define all(x) (x).begin(),(x).end()
#define fi first
#define se second
#define SZ(x) ((int)(x).size())
typedef vector<int> VI;
typedef long long ll;
typedef pair<int,int> PII;
const ll mod=1000000007;
ll powmod(ll a,ll b) {ll res=1;a%=mod; assert(b>=0); for(;b;b>>=1){if(b&1)res=res*a%mod;a=a*a%mod;}return res;}
ll gcd(ll a,ll b) { return b?gcd(b,a%b):a;}
// head
 
typedef unsigned long long u64;
typedef __int128_t i128;
typedef __uint128_t u128;
int _,k;
u64 A0,A1,M0,M1,C,M;
 
struct Mod64 {
	Mod64():n_(0) {}
	Mod64(u64 n):n_(init(n)) {}
	static u64 init(u64 w) { return reduce(u128(w) * r2); }
	static void set_mod(u64 m) {
		mod=m; assert(mod&1);
		inv=m; rep(i,0,5) inv*=2-inv*m;
		r2=-u128(m)%m;
	}
	static u64 reduce(u128 x) {
		u64 y=u64(x>>64)-u64((u128(u64(x)*inv)*mod)>>64);
		return ll(y)<0?y+mod:y;
	}
	Mod64& operator += (Mod64 rhs) { n_+=rhs.n_-mod; if (ll(n_)<0) n_+=mod; return *this; }
	Mod64 operator + (Mod64 rhs) const { return Mod64(*this)+=rhs; }
	Mod64& operator -= (Mod64 rhs) { n_-=rhs.n_; if (ll(n_)<0) n_+=mod; return *this; }
	Mod64 operator - (Mod64 rhs) const { return Mod64(*this)-=rhs; }
	Mod64& operator *= (Mod64 rhs) { n_=reduce(u128(n_)*rhs.n_); return *this; }
	Mod64 operator * (Mod64 rhs) const { return Mod64(*this)*=rhs; }
	u64 get() const { return reduce(n_); }
	static u64 mod,inv,r2;
	u64 n_;
};
u64 Mod64::mod,Mod64::inv,Mod64::r2;
 
u64 pmod(u64 a,u64 b,u64 p) {
	u64 d=(u64)floor(a*(long double)b/p+0.5);
	ll ret=a*b-d*p;
	if (ret<0) ret+=p;
	return ret;
}
 
 
void bruteforce() {
	u64 ans=1;
	for (int i=0;i<=k;i++) {
		ans=pmod(ans,A0,M);
		u64 A2=pmod(M0,A1,M)+pmod(M1,A0,M)+C;
		while (A2>=M) A2-=M;
		A0=A1; A1=A2;
	}
	printf("%llu\n",ans);
}
 
int main() {
	for (scanf("%d",&_);_;_--) {
		scanf("%llu%llu%llu%llu%llu%llu%d",&A0,&A1,&M0,&M1,&C,&M,&k);
		Mod64::set_mod(M);
		Mod64 a0(A0),a1(A1),m0(M0),m1(M1),c(C),ans(1),a2(0);
		for (int i=0;i<=k;i++) {
			ans=ans*a0;
			a2=m0*a1+m1*a0+c;
			a0=a1; a1=a2;
		}
		printf("%llu\n",ans.get());
	}
}

(比 O ( 1 ) O(1) 快速乘快了 1 1 秒多)

猜你喜欢

转载自blog.csdn.net/Scar_Halo/article/details/82946977