#3328. PYXFIB

∑ i = 0 ⌊ n k ⌋ C n i × k × F i × k ∑ i = 0 n C n i × F i × [ i ≡ 0 ( m o d k ) ] i ≡ 0 ( m o d k ) , 单 位 根 反 演 有 1 k ∑ j = 0 k − 1 w k i j 1 k ∑ i = 0 n C n i × F i ∑ j = 0 k − 1 w k i j \sum_{i = 0} ^{\lfloor \frac{n}{k} \rfloor} C_{n} ^{i \times k} \times F_{i \times k}\\ \sum_{i = 0} ^{n} C_n ^{i} \times F_i \times [i \equiv 0 \pmod k]\\ i \equiv 0 \pmod k,单位根反演有 \frac{1}{k} \sum_{j = 0} ^{k - 1} w_k ^ {ij}\\ \frac{1}{k}\sum_{i = 0} ^{n} C_n ^ i \times F_i \sum_{j = 0} ^{k - 1} w_k ^{ij}\\ i=0knCni×k×Fi×ki=0nCni×Fi×[i0(modk)]i0(modk)k1j=0k1wkijk1i=0nCni×Fij=0k1wkij
由于 F i F_i Fi为斐波那契数列的第 i i i项,构造矩阵 A = [ 1 1   0 1 ] A = [_1 ^ 1\ _0 ^ 1] A=[11 01],则有 A i [ 0 ] [ 0 ] A ^ i[0][0] Ai[0][0] F i F_i Fi ∑ i = 0 n C n i × F i = ( I + A ) n \sum\limits_{i = 0} ^{n}C_n ^ i \times F_i = (I + A) ^ n i=0nCni×Fi=(I+A)n
1 k ∑ i = 0 n ∑ j = 0 k − 1 C n i × A i × w k i j 1 k ∑ j = 0 k − 1 ( A × w k j + I ) n \frac{1}{k} \sum_{i = 0} ^{n} \sum_{j = 0} ^{k - 1} C_{n} ^ i \times A ^ i \times w_{k} ^ {ij}\\ \frac{1}{k}\sum_{j = 0} ^{k - 1}(A \times w_k ^ j + I) ^ n\\ k1i=0nj=0k1Cni×Ai×wkijk1j=0k1(A×wkj+I)n

#include <bits/stdc++.h>
#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4.1,sse4.2,avx,avx2,popcnt,tune=native")

using namespace std;

int n, k, mod;

int dec(int a, int b) {
  return a >= b ? a - b : a + mod - b;
}

int add(int a, int b) {
  return a + b >= mod ? a + b - mod : a + b;
}

namespace min_25 {
  const int N = 1e6 + 10;

  int a[N], id1[N], id2[N], prime[N], g1[N], s[N], sum1[N], sum[N], m, cnt, T, wn;

  bool st[N];

  inline int ID(int x) {
    return x <= T ? id1[x] : id2[n / x];
  }

  void pre() {
    T = sqrt(n + 0.5);
    for (int i = 2; i <= T; i++) {
      if (!st[i]) {
        prime[++cnt] = i;
        sum1[cnt] = sum1[cnt - 1] + 1;
      }
      for (int j = 1; j <= cnt && 1ll * i * prime[j] <= T; j++) {
        st[i * prime[j]] = 1;
        if (i % prime[j] == 0) {
          break;
        }
      }
    }
    for (int l = 1, r; l <= n; l = r + 1) {
      r = n / (n / l);
      a[++m] = n / l;
      a[m] <= T ? id1[a[m]] = m : id2[n / a[m]] = m;
      g1[m] = a[m] - 1;
    }
    for (int j = 1; j <= cnt && 1ll * prime[j] * prime[j] <= n; j++) {
      for (int i = 1; i <= m && 1ll * prime[j] * prime[j] <= a[i]; i++) {
        g1[i] = dec(g1[i], dec(g1[ID(a[i] / prime[j])], sum1[j - 1]));
      }
    }
  }

  void init() {
    for (int i = 1; i <= cnt; i++) {
      sum[i] = add(sum[i - 1], wn);
    }
    for (int i = 1; i <= m; i++) {
      s[i] = 1ll * g1[i] * wn % mod;
    }
    for (int j = cnt; j >= 1; j--) {
      for (int i = 1; i <= m && 1ll * prime[j] * prime[j] <= a[i]; i++) {
        for (int cur = prime[j], w = wn; 1ll * cur * prime[j] <= a[i]; cur *= prime[j], w = 1ll * w * wn % mod) {
          s[i] = add(s[i], add(1ll * dec(s[ID(a[i] / cur)], sum[j]) * w % mod, 1ll * wn * w % mod));
        }
      }
    }
  }

  int solve(int n) {
    return n ? s[ID(n)] + 1 : 0;
  }
}

int quick_pow(int a, int n) {
  int ans = 1;
  while (n) {
    if (n & 1) {
      ans = 1ll * ans * a % mod;
    }
    a = 1ll * a * a % mod;
    n >>= 1;
  }
  return ans;
}

int get_g() {
  if (mod == 2) {
    return 1;
  }
  int cur = mod - 1;
  vector<int> facts;
  for (int i = 2; 1ll * i * i <= cur; i++) {
    if (cur % i == 0) {
      facts.push_back(i);
      while (cur % i == 0) {
        cur /= i;
      }
    }
  }
  if (cur != 1) {
    facts.push_back(cur);
  }
  for (int i = 1; ; i++) {
    if (__gcd(i, mod) != 1) {
      continue;
    }
    int flag = 1;
    for (int &it : facts) {
      if (quick_pow(i, (mod - 1) / it) == 1) {
        flag = 0;
        break;
      }
    }
    if (flag) {
      return i;
    }
  }
}

int main() {
  // freopen("in.txt", "r", stdin);
  // freopen("out.txt", "w", stdout);
  scanf("%d %d %d", &n, &k, &mod);
  int w[20] = {1}, wn = quick_pow(get_g(), (mod - 1) / k);
  for (int i = 1; i <= k; i++) {
    w[i] = 1ll * w[i - 1] * wn % mod;
  }
  int ans[20] = {0};
  min_25::pre();
  for (int j = 0; j < k; j++) {
    min_25::wn = w[j];
    min_25::init();
    int res = 0;
    for (int l = 1, r; l <= n; l = r + 1) {
      r = n / (n / l);
      int cur = dec(min_25::solve(r), min_25::solve(l - 1));
      res = add(res, 1ll * (n / l) * cur % mod);
    }
    for (int r = 0; r < k; r++) {
      ans[r] = add(ans[r], 1ll * w[k - j * r % k] * res % mod);
    }
  }
  int inv_k = quick_pow(k, mod - 2);
  for (int i = 0; i < k; i++) {
    printf("%lld ", 1ll * ans[i] * inv_k % mod);
  }
  return 0;
}