传送门
用杜教筛筛就可以了(g 设为Id^2)
#include<bits/stdc++.h>
#define N 10000050
#define LL long long
using namespace std;
LL P,n;
int prim[N],isp[N],tot; LL phi[N];
map<LL,LL> Phi;
LL inv2, inv6;
LL power(LL a, LL b){
LL ans = 1;
for(;b;b>>=1){ if(b&1) ans = (ans*a) % P; a = (a*a) % P;}
return ans;
}
void prework(){
inv2 = power(2, P-2);
inv6 = power(6, P-2);
phi[1] = 1;
for(int i=2;i<=N-50;i++){
if(!isp[i]) prim[++tot] = i, phi[i] = i-1;
for(int j=1;j<=tot;j++){
if(i * prim[j] > N - 50) break;
isp[i * prim[j]] = 1;
if(i % prim[j] == 0){
phi[i * prim[j]] = phi[i] * prim[j];
break;
}
phi[i * prim[j]] = phi[i] * (prim[j] - 1);
}
}
for(int i=1;i<=N-50;i++) phi[i] = phi[i] % P * i % P * i % P;
for(int i=2;i<=N-50;i++) phi[i] += phi[i-1], phi[i] %= P;
}
LL Sum(LL x){x %= P; return x * (x+1) % P * inv2 % P;}
LL G(LL x){ x %= P; return x * (x+1) % P * (x*2+1) % P * inv6 % P;}
LL getf(LL x){
if(x<=N-50) return phi[x];
if(Phi[x]) return Phi[x];
LL ans = Sum(x); ans = (ans * ans) % P;
for(LL l=2,r;l<=x;l=r+1){
LL val = x/l; r = x/val;
ans -= (G(r) - G(l-1)) % P * getf(val) % P;
ans = (ans + P) % P;
} return Phi[x] = ans;
}
int main(){
scanf("%lld%lld",&P,&n); prework();
LL ans = 0;
for(LL l=1,r;l<=n;l=r+1){
LL val = n/l; r = n/val;
LL tmp = Sum(val); tmp = (tmp * tmp) % P;
ans += tmp * (getf(r) - getf(l-1)) % P;
ans = (ans + P) % P;
} printf("%lld",ans);
}