#3328. PYXFIB
∑
i
=
0
⌊
n
k
⌋
C
n
i
×
k
×
F
i
×
k
∑
i
=
0
n
C
n
i
×
F
i
×
[
i
≡
0
(
m
o
d
k
)
]
i
≡
0
(
m
o
d
k
)
,
单
位
根
反
演
有
1
k
∑
j
=
0
k
−
1
w
k
i
j
1
k
∑
i
=
0
n
C
n
i
×
F
i
∑
j
=
0
k
−
1
w
k
i
j
\sum_{i = 0} ^{\lfloor \frac{n}{k} \rfloor} C_{n} ^{i \times k} \times F_{i \times k}\\ \sum_{i = 0} ^{n} C_n ^{i} \times F_i \times [i \equiv 0 \pmod k]\\ i \equiv 0 \pmod k,单位根反演有 \frac{1}{k} \sum_{j = 0} ^{k - 1} w_k ^ {ij}\\ \frac{1}{k}\sum_{i = 0} ^{n} C_n ^ i \times F_i \sum_{j = 0} ^{k - 1} w_k ^{ij}\\
i=0∑⌊kn⌋Cni×k×Fi×ki=0∑nCni×Fi×[i≡0(modk)]i≡0(modk),单位根反演有k1j=0∑k−1wkijk1i=0∑nCni×Fij=0∑k−1wkij
由于
F
i
F_i
Fi为斐波那契数列的第
i
i
i项,构造矩阵
A
=
[
1
1
0
1
]
A = [_1 ^ 1\ _0 ^ 1]
A=[11 01],则有
A
i
[
0
]
[
0
]
A ^ i[0][0]
Ai[0][0]为
F
i
F_i
Fi,
∑
i
=
0
n
C
n
i
×
F
i
=
(
I
+
A
)
n
\sum\limits_{i = 0} ^{n}C_n ^ i \times F_i = (I + A) ^ n
i=0∑nCni×Fi=(I+A)n。
1
k
∑
i
=
0
n
∑
j
=
0
k
−
1
C
n
i
×
A
i
×
w
k
i
j
1
k
∑
j
=
0
k
−
1
(
A
×
w
k
j
+
I
)
n
\frac{1}{k} \sum_{i = 0} ^{n} \sum_{j = 0} ^{k - 1} C_{n} ^ i \times A ^ i \times w_{k} ^ {ij}\\ \frac{1}{k}\sum_{j = 0} ^{k - 1}(A \times w_k ^ j + I) ^ n\\
k1i=0∑nj=0∑k−1Cni×Ai×wkijk1j=0∑k−1(A×wkj+I)n
#include <bits/stdc++.h>
#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4.1,sse4.2,avx,avx2,popcnt,tune=native")
using namespace std;
int n, k, mod;
int dec(int a, int b) {
return a >= b ? a - b : a + mod - b;
}
int add(int a, int b) {
return a + b >= mod ? a + b - mod : a + b;
}
namespace min_25 {
const int N = 1e6 + 10;
int a[N], id1[N], id2[N], prime[N], g1[N], s[N], sum1[N], sum[N], m, cnt, T, wn;
bool st[N];
inline int ID(int x) {
return x <= T ? id1[x] : id2[n / x];
}
void pre() {
T = sqrt(n + 0.5);
for (int i = 2; i <= T; i++) {
if (!st[i]) {
prime[++cnt] = i;
sum1[cnt] = sum1[cnt - 1] + 1;
}
for (int j = 1; j <= cnt && 1ll * i * prime[j] <= T; j++) {
st[i * prime[j]] = 1;
if (i % prime[j] == 0) {
break;
}
}
}
for (int l = 1, r; l <= n; l = r + 1) {
r = n / (n / l);
a[++m] = n / l;
a[m] <= T ? id1[a[m]] = m : id2[n / a[m]] = m;
g1[m] = a[m] - 1;
}
for (int j = 1; j <= cnt && 1ll * prime[j] * prime[j] <= n; j++) {
for (int i = 1; i <= m && 1ll * prime[j] * prime[j] <= a[i]; i++) {
g1[i] = dec(g1[i], dec(g1[ID(a[i] / prime[j])], sum1[j - 1]));
}
}
}
void init() {
for (int i = 1; i <= cnt; i++) {
sum[i] = add(sum[i - 1], wn);
}
for (int i = 1; i <= m; i++) {
s[i] = 1ll * g1[i] * wn % mod;
}
for (int j = cnt; j >= 1; j--) {
for (int i = 1; i <= m && 1ll * prime[j] * prime[j] <= a[i]; i++) {
for (int cur = prime[j], w = wn; 1ll * cur * prime[j] <= a[i]; cur *= prime[j], w = 1ll * w * wn % mod) {
s[i] = add(s[i], add(1ll * dec(s[ID(a[i] / cur)], sum[j]) * w % mod, 1ll * wn * w % mod));
}
}
}
}
int solve(int n) {
return n ? s[ID(n)] + 1 : 0;
}
}
int quick_pow(int a, int n) {
int ans = 1;
while (n) {
if (n & 1) {
ans = 1ll * ans * a % mod;
}
a = 1ll * a * a % mod;
n >>= 1;
}
return ans;
}
int get_g() {
if (mod == 2) {
return 1;
}
int cur = mod - 1;
vector<int> facts;
for (int i = 2; 1ll * i * i <= cur; i++) {
if (cur % i == 0) {
facts.push_back(i);
while (cur % i == 0) {
cur /= i;
}
}
}
if (cur != 1) {
facts.push_back(cur);
}
for (int i = 1; ; i++) {
if (__gcd(i, mod) != 1) {
continue;
}
int flag = 1;
for (int &it : facts) {
if (quick_pow(i, (mod - 1) / it) == 1) {
flag = 0;
break;
}
}
if (flag) {
return i;
}
}
}
int main() {
// freopen("in.txt", "r", stdin);
// freopen("out.txt", "w", stdout);
scanf("%d %d %d", &n, &k, &mod);
int w[20] = {1}, wn = quick_pow(get_g(), (mod - 1) / k);
for (int i = 1; i <= k; i++) {
w[i] = 1ll * w[i - 1] * wn % mod;
}
int ans[20] = {0};
min_25::pre();
for (int j = 0; j < k; j++) {
min_25::wn = w[j];
min_25::init();
int res = 0;
for (int l = 1, r; l <= n; l = r + 1) {
r = n / (n / l);
int cur = dec(min_25::solve(r), min_25::solve(l - 1));
res = add(res, 1ll * (n / l) * cur % mod);
}
for (int r = 0; r < k; r++) {
ans[r] = add(ans[r], 1ll * w[k - j * r % k] * res % mod);
}
}
int inv_k = quick_pow(k, mod - 2);
for (int i = 0; i < k; i++) {
printf("%lld ", 1ll * ans[i] * inv_k % mod);
}
return 0;
}