Description

给定 \(N,k\),求:


\[\sum_{i=1}^N\sum_{j=1}^N (f(\gcd(i,j)))^k\]


其中 \(f(x)\) 表示 \(x\) 质因子分解结果中次大的质因子,重复的质因数计算多次。

Constraints

\(1\le N,k\le 2\cdot 10^9\)

Solution

记 \(f_k(x)=(f(x))^k\)。推式子:


\[\begin{aligned}\sum_{i}^N\sum_j^N f_k(\gcd(i,j)) &= \sum_g^N f_k(g)\sum_{i}^{N/g}\sum_{j}^{N/g} [\gcd(i,j)=1] \\ &= \sum_g^N f_k(g)\sum_d^{N/g} \mu(d)\left\lfloor\frac{N}{dg}\right\rfloor^2 \\ &= \sum_g^N f_k(g)G\left(\left\lfloor\frac{N}{g}\right\rfloor\right) \\ G(N) &= \sum_{d}^N \mu(d)\lfloor N/d\rfloor^2 \end{aligned} \]


随便什么筛出 \(\mu\),min-25 用可以用 UOJ Sanrd 的套路筛出 \(f_k\)​。

整除分块套整除分块复杂度 \(O\left( \int_1^\sqrt{N}\sqrt i\right)+O\left( \int_1^\sqrt{N}\sqrt \frac{N}{i} \right) = O(N^{3/4})\)。题中 \(N\le 2\cdot 10^9\)​​,算比较小的,可以直接过。

#include <algorithm>
#include <cmath>
#include <cstdio>
using uint = unsigned int;
using ull = unsigned long long;

inline uint fastpow(uint a, uint b) {
uint r = 1;
for (; b; b >>= 1, a *= a)
if (b & 1) r *= a;
return r;
}

const int N = 1e5;
uint n, K;

namespace min_25 {
uint n, m, tot, cnt, K;
uint D[N], p[N], pK[N];
bool vis[N];

inline uint id(uint x) {
return x <= m ? cnt - x + 1 : n / x;
}

void sieve(uint n) {
vis[1] = 1;
for (uint i = 1; i <= n; i++) {
if (!vis[i]) p[++tot] = i;
for (uint j = 1; j <= tot && i * p[j] <= n; j++) {
vis[i * p[j]] = 1;
if (i % p[j] == 0) break;
}
}
for (uint i = 1; i <= tot; i++)
pK[i] = fastpow(p[i], K);
}

uint s[N];
void init_s() {
for (uint i = 1; i <= cnt; i++)
s[i] = D[i] - 1;
for (uint j = 1; j <= tot; j++)
for (uint i = 1; i <= cnt && D[i] >= (ull)p[j] * p[j]; i++)
s[i] -= s[id(D[i] / p[j])] - (j - 1);
}

namespace fk {
uint g[N];
void init_g() {
for (uint i = 1; i <= cnt; i++)
g[i] = s[i];
for (uint j = tot; j; --j)
for (uint i = 1; i <= cnt && D[i] >= (ull)p[j] * p[j]; i++)
for (uint pk = p[j]; (ull)pk * p[j] <= D[i]; pk *= p[j])
g[i] += g[id(D[i] / pk)] - s[id(D[i] / pk)]
+ pK[j] * (s[id(D[i] / pk)] - j + 1);
}
inline uint get(uint x) {
if (!x) return 0u;
return g[id(x)];
}
}

namespace mu {
uint g[N];
void init_g() {
for (uint i = 1; i <= cnt; i++)
g[i] = -s[i];
for (uint j = tot; j; --j)
for (uint i = 1; i <= cnt && D[i] >= (ull)p[j] * p[j]; i++)
g[i] -= g[id(D[i] / p[j])] + j;
}
inline uint get(uint x) {
if (!x) return 0u;
return g[id(x)] + 1;
}
}

void load(uint _n, uint _K) {
n = _n, K = _K, m = sqrt(n);
for (uint x = 1; x <= n; x = n / (n / x) + 1)
D[++cnt] = n / x;
sieve(m);

init_s();
mu::init_g();
fk::init_g();
}
}

signed main() {
scanf("%u%u", &n, &K);
min_25::load(n, K);

auto calc_S = [&](uint n) {
using min_25::mu::get;
uint res = 0;
for (uint l = 1, r; l <= n; l = r + 1) {
r = n / (n / l);
res += (get(r) - get(l - 1)) * (n / l) * (n / l);
}
return res;
};

auto calc_F = [&](uint n) {
using min_25::fk::get;
uint res = 0;
for (uint l = 1, r; l <= n; l = r + 1) {
r = n / (n / l);
res += calc_S(n / l) * (get(r) - get(l - 1));
}
return res;
};

printf("%u\n", calc_F(n));
return 0;
}


Solution Plus 1

做完发现有更优做法!考虑迪利克雷卷积:\(h=f_k*\mu\)。那么答案成了:


\[\sum_i^N h(i)\left\lfloor\frac{N}{i}\right\rfloor^2\]


式子非常简洁,尝试直接求 \(h\) 的前缀和。由于不是积性函数,而且要比 \(f_k\) 的魔改方法复杂得多,min-25 筛直接求 \(h\) 有些困难。而 \(h\) 和迪利克雷卷积关系较大,由人类智慧可得:


\[h*I = f_k*\mu *I=f_k*\varepsilon=f_k\]


凑出了卷积形式,考虑杜教筛。记 \(F, H\) 分别为 \(f_k,h\) 的前缀和。那么:


\[F(N)=\sum_{i=1}^N H(\lfloor N/i\rfloor) \Leftrightarrow H(N)=F(N)-\sum_{i=2}^N H(\lfloor N/i\rfloor)\]


杜教筛复杂度 \(O(N^{2/3})\),总复杂度 \(O\left(\frac{N^{3/4}}{\log N}\right)\),瓶颈在 min-25 求 \(F\)。

Solution Plus 2

经过 Mr_Spade 指导,可以不要杜教筛!

考虑目标在于快速求 \(H\)​​​。这里有一个船新科技:设阈值 \(n^{2/3}\)​,并以此分治:

  • 对于 \(\le n^{2/3}\)​​ 的点值,线性筛出 \(f_k,\mu\)​ 的点值,暴力迪利克雷卷积求出 \(h\)​ 在 \(n^{2/3}\)​​ 内所有的点值,最后前缀和求出 \(H\)。复杂度 \(O(n^{2/3}\log n)\)​。
  • 对于 \(>n^{2/3}\)​​ 的点值​,使用之前整除分块的做法算出 \(H\)​ 在较大整除点的点值。复杂度 \(O\left(\int_1^\sqrt[3]N \sqrt\frac{N}{i}\right)=O(n^{2/3})\)。

最后一次整除分块就做完了。复杂度仍然是 \(O\left(\frac{N^{3/4}}{\log N}\right)\)。常数比杜教筛小很多!

#include <algorithm>
#include <cmath>
#include <cstdio>
using uint = unsigned int;
using ull = unsigned long long;

inline uint fastpow(uint a, uint b) {
uint r = 1;
for (; b; b >>= 1, a *= a)
if (b & 1) r *= a;
return r;
}

const int N = 1e5;
const int L = 1e7;
uint n, K, gap;
uint sfk[L], smu[L], sh[L];

namespace min_25 {
uint n, m, tot, cnt, K;
uint D[N], p[L / 10], pK[L / 10];
bool vis[L];

inline uint id(uint x) {
return x <= m ? cnt - x + 1 : n / x;
}

void sieve(uint n) {
vis[1] = 1;
sfk[1] = 0, smu[1] = 1;
for (uint i = 1; i <= n; i++) {
if (!vis[i]) {
p[++tot] = i;
sfk[i] = 1, smu[i] = -1;
pK[tot] = fastpow(i, K);
}
for (uint j = 1; j <= tot && i * p[j] <= n; j++) {
vis[i * p[j]] = 1;
if (vis[i]) sfk[i * p[j]] = sfk[i];
else
sfk[i * p[j]] = pK[j];
if (i % p[j] == 0) break;
smu[i * p[j]] = -smu[i];
}
}
while (p[tot] > m) --tot;
}

uint s[N];
void init_s() {
for (uint i = 1; i <= cnt; i++)
s[i] = D[i] - 1;
for (uint j = 1; j <= tot; j++)
for (uint i = 1; i <= cnt && D[i] >= (ull)p[j] * p[j]; i++)
s[i] -= s[id(D[i] / p[j])] - (j - 1);
}

namespace fk {
uint g[N];
void init_g() {
for (uint i = 1; i <= cnt; i++)
g[i] = s[i];
for (uint j = tot; j; --j)
for (uint i = 1; i <= cnt && D[i] >= (ull)p[j] * p[j]; i++)
for (uint pk = p[j]; (ull)pk * p[j] <= D[i]; pk *= p[j])
g[i] += g[id(D[i] / pk)] - s[id(D[i] / pk)]
+ pK[j] * (s[id(D[i] / pk)] - j + 1);
}
inline uint get(uint x) {
if (!x) return 0u;
return g[id(x)];
}
}

namespace mu {
uint g[N];
void init_g() {
for (uint i = 1; i <= cnt; i++)
g[i] = -s[i];
for (uint j = tot; j; --j)
for (uint i = 1; i <= cnt && D[i] >= (ull)p[j] * p[j]; i++)
g[i] -= g[id(D[i] / p[j])] + j;
}
inline uint get(uint x) {
if (!x) return 0u;
return g[id(x)] + 1;
}
}

void load(uint _n, uint _K) {
n = _n, K = _K, m = sqrt(n);
for (uint x = 1; x <= n; x = n / (n / x) + 1)
D[++cnt] = n / x;
sieve(std::max(gap, m));

init_s();
mu::init_g();
fk::init_g();
}
}

signed main() {
scanf("%u%u", &n, &K);
gap = pow(n, 2.0 / 3);
min_25::load(n, K);

for (uint i = 1; i <= gap; i++)
for (uint j = 1; i * j <= gap; j++)
sh[i * j] += sfk[i] * smu[j];
for (uint i = 2; i <= gap; i++)
sh[i] += sh[i - 1];

auto calc_H = [&](uint x) {
auto mu = min_25::mu::get;
auto fk = min_25::fk::get;
if (x <= gap) return sh[x];
uint ret = 0;
for (uint l = 1, r; l <= x; l = r + 1) {
r = x / (x / l);
ret += (mu(r) - mu(l - 1)) * fk(x / l);
}
return ret;
};

uint ans = 0, lst = 0;
for (uint l = 1, r; l <= n; l = r + 1) {
r = n / (n / l);
uint cur = calc_H(r);
ans += (cur - lst) * (n / l) * (n / l);
lst = cur;
}

printf("%u\n", ans);
return 0;
}