​传送门​​​ 首先可以写成数位 WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_进制 的形式

WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_i++_02
WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_i++_03 的转移为三进制不进位加法
考虑倍增,将两个序列拼接起来

WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_i++_04

不进位加法可以 WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_进制_05 优化
然后还有一个限制就是 WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_进制_06 进制下的循环卷积

我们将 WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_i++_07 看做一个矩阵,那么我们先将每一行 WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_#define_08
然后对每一列做一个卷积,乘上 10 的次幂就是下标处理一下

复杂度 WOJ4680 魅力(数位DP)(三进制 FWT)(FFT)_i++_09


#include<bits/stdc++.h>
#define cs const
using namespace std;
cs int Mod = 998244353;
typedef long long ll;
int add(int a, int b){ return a + b >= Mod ? a + b - Mod : a + b; }
int mul(int a, int b){ return 1ll * a * b % Mod; }
int ksm(int a, int b){ int ans = 1; for(;b;b>>=1, a=mul(a,a)) if(b&1) ans = mul(ans, a); return ans; }
int dec(int a, int b){ return a - b < 0 ? a - b + Mod : a - b; }
void Add(int &a, int b){ a = add(a, b); }
cs int N = 255, M = 730;
struct cp{
int x, y;
cp(int _x = 0, int _y = 0){ x = _x; y = _y; }
cp operator + (cs cp &a){ return cp(add(x, a.x), add(y, a.y)); }
cp operator - (cs cp &a){ return cp(dec(x, a.x), dec(y, a.y)); }
cp operator * (cs cp &a){ return cp(dec(mul(x,a.x), mul(y,a.y)), dec(add(mul(x,a.y), mul(y,a.x)), mul(y,a.y))); }
} w0, w1, w2, W1[21], W2[21];
#define poly vector<cp>
poly f[N], A;
ll n; int k, pw[10];
int up, bit; vector<int> rev;
//void debug(poly f){
// for(int i = 0; i < f.size(); i++) cout << f[i].x << " ";
// cout << endl;
//}
void NTT_init(){
w0 = cp(1, 0); w1 = cp(0, 1); w2 = cp(Mod-1, Mod-1);
W1[20] = cp(ksm(3, (Mod-1)>>21), 0);
W2[20] = cp(ksm(W1[20].x, Mod-2), 0);
for(int i = 19; ~i; i--) W1[i] = cp(mul(W1[i+1].x, W1[i+1].x), 0), W2[i] = cp(mul(W2[i+1].x, W2[i+1].x), 0);
}
void init(int deg){
up = 1; bit = 0; while(up < deg) up <<= 1, ++bit;
rev.resize(up); for(int i = 0; i < up; i++) rev[i] = (rev[i>>1]>>1)|((i&1)<<(bit-1));
}
void FWT(poly &a, int deg, int typ){
for(int i = 1; i < deg; i *= 3)
for(int j = 0; j < deg; j += i*3)
for(int k = 0; k < i; k++){
cp x = a[k+j], y = a[k+j+i], z = a[k+j+i+i];
a[k+j] = x + y + z;
a[k+j+i] = x + y * w1 + z * w2;
a[k+j+i+i] = x + y * w2 + z * w1;
if(typ == -1) swap(a[k+j+i], a[k+j+i+i]);
}
if(typ == -1){
cp inv(ksm(deg, Mod-2), 0);
for(int i = 0; i < deg; i++) a[i] = a[i] * inv;
}
}
void NTT(poly &a, int typ){
for(int i = 0; i < up; i++) if(i < rev[i]) swap(a[i], a[rev[i]]);
for(int i = 1, l = 0; i < up; i <<= 1, ++l){
cp wn = typ==1 ? W1[l] : W2[l];
for(int j = 0; j < up; j += (i<<1)){
cp w(1, 0);
for(int k = 0; k < i; k++, w = w * wn){
cp x = a[k+j], y = a[k+j+i] * w;
a[k+j] = x+y; a[k+j+i] = x-y;
}
}
}
if(typ == -1){
cp inv(ksm(up, Mod-2), 0);
for(int i = 0; i < up; i++) a[i] = a[i] * inv;
}
}
poly operator * (poly a, poly b){
int deg = a.size() + b.size() - 1;
init(deg); a.resize(up); b.resize(up);
NTT(a, 1); NTT(b, 1);
for(int i = 0; i < up; i++) a[i] = a[i] * b[i];
NTT(a, -1);
for(int i = k; i < up; i++) a[i % k] = a[i % k] + a[i];
a.resize(k); return a;
}
poly Mul(poly a, poly b, int coef){
poly f; f.resize(a.size());
for(int i = 0; i < k; i++) f[i * coef % k] = f[i * coef % k] + a[i];
return f * b;
}
poly operator ^ (poly a, ll b){
poly ans; ans.resize(k); ans[0] = cp(1, 0);
for(int l=10%k;b;b>>=1,a=Mul(a,a,l), l=l*l%k) if(b&1) ans=Mul(ans,a,l);
return ans;
}
int main(){
NTT_init();
string s; cin >> n >> k >> s; int len = s.length();
pw[0] = 1; for(int i = 1; i < 10; i++) pw[i] = pw[i-1] * 3;
static int vs[10];
for(int i = 0; i < len; i++) vs[s[i] - '0'] = 1+i;
for(int i = 0; i < k; i++) f[i].resize(pw[len]);
for(int i = 0; i < 10; i++) ++ f[i % k][pw[vs[i]]/3].x;
for(int i = 0; i < min(10,k); i++) FWT(f[i], pw[len], 1);
A.resize(k);
poly ans; ans.resize(pw[len]);
for(int i = 0; i < pw[len]; i++){
for(int j = 0; j < k; j++) A[j] = f[j][i];
ans[i] = (A^n)[0];
} FWT(ans, pw[len], -1); cout << ans[0].x;
return 0;
}