The meaning of problems: two van stone scissors cloth game total of n disks, suppose A win a plate, B won b disk, then the score is GCD (a, b), seeking scores desired * \ (3 ^ {2 * n} \)
solution: The meaning of the questions it is obvious \ (ans = 3 ^ {n } * \ sum_ {a = 0} ^ {n} \ sum_ {b = 0} ^ {na} gcd (a, b) C (n-, A) C (Na, B) \)
\ (ANS = \ sum_ {D =. 1} ^ Nd \ sum_ {A = 0} ^ n-\ sum_ {B = 0} ^ {Na} [GCD ( a, b) == d] C
(n, a) C (na, b) \) is assumed \ (f (d) = \ sum_ {a = 0} ^ n \ sum_ {b = 0} ^ {na} [GCD (A, B) == D] C (n-, A) C (Na, B) \) , \ (F. (D) = \ sum_ = {0} ^ n-A \ B = {0} sum_ ^ {Na} [D | GCD (A, B)] C (n-, A) C (Na, B) \) ,
then the \ (F. (D) = \ sum_ {D | X} F (X) \) , \ (F (D) = \ sum_ {D | X} \ MU (\ FRAC {X} {D}) F. (X) \) .
\ (ANS =. 3 ^ {n-} \ sum_ {D =. 1} ^ Nd \ sum_ {D | X} \ MU (\ FRAC {X} {D}) F. (X) \)
\ (ANS =. 3 ^ {n-} \ sum_ {X =. 1} ^ nF (X) \ sum_ { D | X} D \ MU (\ FRAC {X} {D}) \)
\ (n-ANS =. 3 ^ {} \ sum_ = {X ^}. 1 nF (X) \ Phi (X) \)
\ (F (d) = \ sum_ {a = 0} ^ n \ sum_ {b = 0} ^ {na} [d | gcd (a, b)] C (n, a) C (na, b) \ )
\ (F. (D) = \ sum_ {A} = 0 {^ \ n-FRAC {} {D}} \ sum_ {B} = 0 {^ \ n-FRAC {D}} {-a} C (n-, D * A) C (Na D *, D B *) -1 \)
\ (F. (D) = \ sum_ {A} = 0 {^ \ n-FRAC {} {D}} \ sum_ B = {0} {^ \ n-FRAC {D}} {-a} C (n-, A * B + D * D) C * (D * A * D + B, B * D) -1 \)
\ (F. (D) = \ sum_ {a = 0} ^ {\ frac {n} {d}} \ sum_ {b = 0} ^ {a} C (n, a * d) C (a * d, b * d) -1 \)
\ (F. (D) = n-! \ sum_ {A = 0} ^ {\ FRAC {n-} {D}} \ FRAC {. 1} {(Na * D)!} \ sum_ {B = 0} ^ {i} \ frac {1}
{(j * d)!} * \ frac {1} {(i * dj * d)!} \) summing coefficients with split fft can later processing, computing enumeration d F (d), the complexity of the \ (\ sum_ {d = 1 } ^ n \ frac {n} {d} log (\ frac {n} {d}) \)
//#pragma GCC optimize(2)
//#pragma GCC optimize(3)
//#pragma GCC optimize(4)
//#pragma GCC optimize("unroll-loops")
//#pragma comment(linker, "/stack:200000000")
//#pragma GCC optimize("Ofast,no-stack-protector")
//#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native")
#include<bits/stdc++.h>
//#include <bits/extc++.h>
#define fi first
#define se second
#define db double
#define mp make_pair
#define pb push_back
#define mt make_tuple
//#define pi acos(-1.0)
#define ll long long
#define vi vector<int>
#define mod 1000000007
#define ld long double
//#define C 0.5772156649
#define ls l,m,rt<<1
#define rs m+1,r,rt<<1|1
#define sqr(x) ((x)*(x))
#define pll pair<ll,ll>
#define pil pair<int,ll>
#define pli pair<ll,int>
#define pii pair<int,int>
#define ull unsigned long long
#define bpc __builtin_popcount
#define base 1000000000000000000ll
#define fin freopen("a.txt","r",stdin)
#define fout freopen("a.txt","w",stdout)
#define fio ios::sync_with_stdio(false);cin.tie(0)
#define mr mt19937 rng(chrono::steady_clock::now().time_since_epoch().count())
inline ll gcd(ll a,ll b){return b?gcd(b,a%b):a;}
inline void sub(ll &a,ll b){a-=b;if(a<0)a+=mod;}
inline void add(ll &a,ll b){a+=b;if(a>=mod)a-=mod;}
template<typename T>inline T const& MAX(T const &a,T const &b){return a>b?a:b;}
template<typename T>inline T const& MIN(T const &a,T const &b){return a<b?a:b;}
inline ll qp(ll a,ll b){ll ans=1;while(b){if(b&1)ans=ans*a%mod;a=a*a%mod,b>>=1;}return ans;}
inline ll qp(ll a,ll b,ll c){ll ans=1;while(b){if(b&1)ans=ans*a%c;a=a*a%c,b>>=1;}return ans;}
using namespace std;
//using namespace __gnu_pbds;
const ld pi=acos(-1);
const ull ba=233;
const db eps=1e-5;
const ll INF=0x3f3f3f3f3f3f3f3f;
const int N=100000+10,maxn=2000000+10,inf=0x3f3f3f3f;
struct cd{
ld x,y;
cd(ld _x=0.0,ld _y=0.0):x(_x),y(_y){}
cd operator +(const cd &b)const{
return cd(x+b.x,y+b.y);
}
cd operator -(const cd &b)const{
return cd(x-b.x,y-b.y);
}
cd operator *(const cd &b)const{
return cd(x*b.x - y*b.y,x*b.y + y*b.x);
}
cd operator /(const db &b)const{
return cd(x/b,y/b);
}
}a[N*3],b[N*3],dfta[N*3],dftb[N*3],dftc[N*3],dftd[N*3];
cd conj(cd a){return cd(a.x,-a.y);}
int rev[N*3],A[N],B[N],C[N*3];
void getrev(int bit)
{
for(int i=0;i<(1<<bit);i++)
rev[i]=(rev[i>>1]>>1) | ((i&1)<<(bit-1));
}
void fft(cd *a,int n,int dft)
{
for(int i=0;i<n;i++)if(i<rev[i])swap(a[i],a[rev[i]]);
for(int step=1;step<n;step<<=1)
{
cd wn(cos(dft*pi/step),sin(dft*pi/step));
for(int j=0;j<n;j+=step<<1)
{
cd wnk(1,0);
for(int k=j;k<j+step;k++)
{
cd x=a[k];
cd y=wnk*a[k+step];
a[k]=x+y;a[k+step]=x-y;
wnk=wnk*wn;
}
}
}
if(dft==-1)for(int i=0;i<n;i++)a[i]=a[i]/n;
}
void mtt(int n,int m,int p) {
if(n<100&&m<100||min(n,m)<=5)
{
for(int i=0;i<=n+m;i++)C[i]=0;
for(int i=0;i<=n;i++)for(int j=0;j<=m;j++)
{
C[i+j]+=1ll*A[i]*B[j]%p;
if(C[i+j]>=p)C[i+j]-=p;
}
return ;
}
int sz=0;
while((1<<sz)<=n+m)sz++;getrev(sz);
int len=1<<sz;
for(int i=0;i<len;i++)
{
int x=(i>n?0:A[i]%p),y=(i>m?0:B[i]%p);
a[i]=cd(x&0x7fff,x>>15);
b[i]=cd(y&0x7fff,y>>15);
}
fft(a,len,1);fft(b,len,1);
for(int i=0;i<len;i++)
{
int j=(len-i)&(len-1);
cd aa,bb,cc,dd;
aa = (a[i] + conj(a[j])) * cd(0.5, 0);
bb = (a[i] - conj(a[j])) * cd(0, -0.5);
cc = (b[i] + conj(b[j])) * cd(0.5, 0);
dd = (b[i] - conj(b[j])) * cd(0, -0.5);
dfta[j] = aa * cc;dftb[j] = aa * dd;
dftc[j] = bb * cc;dftd[j] = bb * dd;
}
for(int i=0;i<len;i++)
{
a[i] = dfta[i] + dftb[i] * cd(0, 1);
b[i] = dftc[i] + dftd[i] * cd(0, 1);
}
fft(a,len,1);fft(b,len,1);
for(int i=0;i<len;i++)
{
int da = (ll)(a[i].x / len + 0.5) % p;
int bb = (ll)(a[i].y / len + 0.5) % p;
int dc = (ll)(b[i].x / len + 0.5) % p;
int dd = (ll)(b[i].y / len + 0.5) % p;
C[i] = (da + ((ll)(bb + dc) << 15) + ((ll)dd << 30)) % p;
C[i] = (C[i]+p)%p;
}
}
int prime[N],cnt,phi[N];
bool mark[N];
void init()
{
phi[1]=1;
for(int i=2;i<N;i++)
{
if(!mark[i])prime[++cnt]=i,phi[i]=i-1;
for(int j=1;j<=cnt&&i*prime[j]<N;j++)
{
mark[i*prime[j]]=1;
if(i%prime[j]==0)
{
phi[i*prime[j]]=phi[i]*prime[j];
break;
}
phi[i*prime[j]]=phi[i]*phi[prime[j]];
}
}
}
int n,p,fac,po;
int f(int d)
{
int ans=0;
for(int i=0;i<=n/d;i++)A[i]=prime[i*d],B[i]=prime[i*d];
mtt(n/d,n/d,p);
// for(int i=0;i<=n/d;i++)printf("%d ",C[i]);puts("");
for(int i=0;i<=n/d;i++)
{
ans+=1ll*prime[n-i*d]*C[i]%p;
if(ans>=p)ans-=p;
}
ans=(1ll*ans*fac-1+p)%p;
return ans;
}
int main()
{
// fin;
init();
int t;scanf("%d",&t);
while(t--)
{
scanf("%d%d",&n,&p);
prime[0]=prime[1]=fac=po=1;
for(int i=2;i<=n;i++)prime[i]=1ll*(p-p/i)*prime[p%i]%p;
for(int i=1;i<=n;i++)prime[i]=1ll*prime[i-1]*prime[i]%p,fac=1ll*fac*i%p,po=1ll*po*3%p;
int ans=0;
for(int d=1;d<=n;d++)
{
ans+=1ll*phi[d]*f(d)%p;
if(ans>=p)ans-=p;
}
printf("%d\n",1ll*ans*po%p);
}
return 0;
}
/********************
********************/