\(\text{Problem}\)

JZOJ上,求

 

\[\sum_{i=1}^n \sum_{j=1}^m \gcd(i,j)^k \]

 

对 \(10^9+7\) 取模
\(n,m,k \le 5 \times 10^6\)

LG 上,是一个加强版,有 \(T(T\le 2 \times 10^3)\) 组数据

\(\text{Analysis}\)

依照套路的方法,我们可以推出

 

\[Ans = \sum_{p=1} p^k \sum_{d=1} \mu(d) \lfloor \frac{n}{pd} \rfloor \lfloor \frac{m}{pd} \rfloor \]

 

若只有一组数据,那么
数论分块套数论分块 \(O(n^{\frac{3}{4}})\) 即可
加上线筛 \(O(n)\)

\(\text{Code}\)
#include<cstdio>
#include<iostream>
#define LL long long
#define re register
using namespace std;

const int N = 5e6, P = 1e9 + 7;
int n, m, k, totp, pr[N], vis[N + 5], sum[N + 5], pk[N + 5];

inline int fpow(LL x, LL y)
{
	LL res = 1;
	for(; y; y >>= 1)
	{
		if (y & 1) res = res * x % P;
		x = x * x % P;
	}
	return res;
}

inline void Euler()
{
	vis[1] = sum[1] = pk[1] = 1;
	for(re int i = 2; i <= N; i++)
	{
		if (!vis[i]) pr[++totp] = i, sum[i] = -1, pk[i] = fpow(i, k);
		for(re int j = 1; j <= totp && i * pr[j] <= N; j++)
		{
			vis[i * pr[j]] = 1, pk[i * pr[j]] = (LL)pk[i] * pk[pr[j]] % P;
			if (!(i % pr[j])) break;
			sum[i * pr[j]] = -sum[i];
		}
	}
	for(re int i = 1; i <= N; i++) sum[i] += sum[i - 1], pk[i] = (pk[i] + pk[i - 1]) % P;
}

inline int F(int n, int m)
{
	LL res = 0;
	for(re int l = 1, r; l <= min(n, m); l = r + 1)
	{
		r = min(n / (n / l), m / (m / l));
		res = (res + (LL)(sum[r] - sum[l - 1] + P) * (n / l) % P * (m / l)) % P;
	}
	return res;
}

int main()
{
	scanf("%d%d%d", &n, &m, &k);
	Euler();
	LL ans = 0;
	for(re int l = 1, r; l <= min(n, m); l = r + 1)
	{
		r = min(n / (n / l), m / (m / l));
		ans = (ans + (LL)(pk[r] - pk[l - 1] + P) * F(n / l, m / l) % P) % P;
	}
	printf("%lld\n", ans);
}

但LG上有多组数据,显然太慢了

同样套路地 \(T=pd\)
然后这个式子成了

 

\[\sum_{T=1} \lfloor \frac{n}{T} \rfloor \lfloor \frac{m}{T} \rfloor \sum_{d|T} d^k \mu(\frac{T}{d}) \]

 

\(g(d)=d^k\) 显然是个积性函数,然后 \(G=g * mu\) 也是个积性函数
于是我们考虑线筛预处理 \(G\),然后数论分快做到单次 \(O(\sqrt n)\)

根据积性函数性质有 \(G(d) = \prod_{i=1} G({p_i}^{c_i})\)

然后我们思考什么样的数有贡献

 

\[G(n) = \prod_{i=1} \sum_{j=0}^{c_i} \mu({p_i}^{j}) {p_i}^{(c_i-j)k} \]

 

因为 \(\mu\) 的性质,我们知道,只有当 \(j=0\) 或 \(j=1\) 时有贡献,于是有

 

\[\begin{aligned} G(n) &= \prod_{i=1} \mu(1) {p_i}^{c_i k} + \mu(p_i) {p_i}^{(c_i-1)k} \\ &= \prod_{i=1} {p_i}^{c_i k} - {p_i}^{(c_i-1)k} \\ &= \prod_{i=1} {p_i}^{(c_i-1) k}({p_i}^k-1) \end{aligned} \]

 

当 \(c_i = 1\) 的时候,就是质数的时候,\(G(p)=p^k-1\)
因为 \(G\) 是积性函数,所以 \(G(ab)=G(a)G(b)(\gcd(a,b)=1)\)
若 \(a,b\) 不互质,因为在线筛时枚举质数,所以 \(b\in \mathbb P\),设 \(a = a' p^c(\gcd(a,a')=1)\)
那么 \(G(ab)=G(a')G(p^{c+1})=G(a')p^{ck}(p^k-1)\)
线筛过程中 \(p^{(c-1)k}(p^k-1)\) 已计入 \(G(ab)\) 中,所以本次再乘上 \(p^k\) 即可
综上

 

\[G(ab)= \begin{cases} G(a)G(b) & \gcd(a,b)=1 \\ G(a)b^k & \gcd(a,b)>1 \end{cases} \]

 

线筛即可完美处理

\(\text{Code}\)
#include<cstdio>
#include<iostream>
#define LL long long
#define re register
using namespace std;

const int N = 5e6, P = 1e9 + 7;
int n, m, k, totp, pr[N], vis[N + 5], pk[N + 5];
LL sum[N + 5];

inline int fpow(LL x, LL y)
{
	LL res = 1;
	for(; y; y >>= 1)
	{
		if (y & 1) res = res * x % P;
		x = x * x % P;
	}
	return res;
}

inline void Euler()
{
	vis[1] = sum[1] = pk[1] = 1;
	for(re int i = 2; i <= N; i++)
	{
		if (!vis[i]) pr[++totp] = i, pk[i] = fpow(i, k), sum[i] = (pk[i] - 1 + P) % P;
		for(re int j = 1; j <= totp && i * pr[j] <= N; j++)
		{
			vis[i * pr[j]] = 1, pk[i * pr[j]] = (LL)pk[i] * pk[pr[j]] % P;
			if (i % pr[j]) sum[i * pr[j]] = sum[i] * sum[pr[j]] % P;
			else{sum[i * pr[j]] = sum[i] * pk[pr[j]] % P; break;}
		}
	}
	for(re int i = 1; i <= N; i++) sum[i] = (sum[i] + sum[i - 1]) % P;
}

int main()
{
	int T; scanf("%d%d", &T, &k);
	Euler();
	for(; T; T--)
	{
		scanf("%d%d", &n, &m);
		LL ans = 0;
		for(re int l = 1, r; l <= min(n, m); l = r + 1)
		{
			r = min(n / (n / l), m / (m / l));
			ans = (ans + (sum[r] - sum[l - 1] + P) * (n / l) % P * (m / l)) % P;
		}
		printf("%lld\n", ans);
	}
}