任意 mod 畳み込み
(convolution/intmod.hpp)
- View this file on GitHub
- Last update: 2025-10-21 21:13:36+09:00
- Include:
#include "convolution/intmod.hpp"
必ずしも NTT friendly でない一般の (32 bit の) mod において $O(N\log N)$ 時間で畳み込みが計算できる.
3 つの NTT friendly な素数について畳み込み,それらの結果から本来の mod での値を復元する.
Depends on
Verified with
Code
#pragma once
#include "../modint/modint.hpp"
#include "../fft/ntt.hpp"
namespace ConvolutionIntMod {
using ll = long long;
const ll Mod1 = 754974721;
const ll Mod2 = 167772161;
const ll Mod3 = 469762049;
const ll M1invM2 = 95869806;
const ll M12invM3 = 187290749;
using M1 = ModInt<Mod1>;
using M2 = ModInt<Mod2>;
using M3 = ModInt<Mod3>;
NTT<M1> ntt1;
NTT<M2> ntt2;
NTT<M3> ntt3;
template <class mint>
vector<mint> multiply(const vector<mint>& a, const vector<mint>& b) {
if (a.empty() || b.empty()) return {};
int mod = mint::get_mod();
ll M12mod = Mod1 * Mod2 % mod;
vector<unsigned int> a0(a.size()), b0(b.size());
for (int i = 0; i < a.size(); i++) a0[i] = a[i].val();
for (int i = 0; i < b.size(); i++) b0[i] = b[i].val();
vector<M1> a1(a0.begin(), a0.end()), b1(b0.begin(), b0.end()), c1 = ntt1.multiply(a1, b1);
vector<M2> a2(a0.begin(), a0.end()), b2(b0.begin(), b0.end()), c2 = ntt2.multiply(a2, b2);
vector<M3> a3(a0.begin(), a0.end()), b3(b0.begin(), b0.end()), c3 = ntt3.multiply(a3, b3);
vector<mint> c(a.size() + b.size() - 1, 0);
for (int i = 0; i < c.size(); i++) {
ll v1 = ((ll)c2[i].val() - (ll)c1[i].val()) * M1invM2 % Mod2;
if (v1 < 0) v1 += Mod2;
ll v2 = ((ll)c3[i].val() - ((ll)c1[i].val() + Mod1 * v1) % Mod3) * M12invM3 % Mod3;
if (v2 < 0) v2 += Mod3;
ll v3 = ((ll)c1[i].val() + Mod1 * v1 + M12mod * v2) % mod;
if (v3 < 0) v3 += mod;
c[i] = v3;
}
return c;
}
template <class mint>
vector<mint> middle_product(const vector<mint>& a, const vector<mint>& b) {
if (b.empty() || a.size() > b.size()) return {};
int mod = mint::get_mod();
ll M12mod = Mod1 * Mod2 % mod;
vector<unsigned int> a0(a.size()), b0(b.size());
for (int i = 0; i < a.size(); i++) a0[i] = a[i].val();
for (int i = 0; i < b.size(); i++) b0[i] = b[i].val();
vector<M1> a1(a0.begin(), a0.end()), b1(b0.begin(), b0.end()), c1 = ntt1.middle_product(a1, b1);
vector<M2> a2(a0.begin(), a0.end()), b2(b0.begin(), b0.end()), c2 = ntt2.middle_product(a2, b2);
vector<M3> a3(a0.begin(), a0.end()), b3(b0.begin(), b0.end()), c3 = ntt3.middle_product(a3, b3);
vector<mint> c(c1.size(), 0);
for (int i = 0; i < c.size(); i++) {
ll v1 = ((ll)c2[i].val() - (ll)c1[i].val()) * M1invM2 % Mod2;
if (v1 < 0) v1 += Mod2;
ll v2 = ((ll)c3[i].val() - ((ll)c1[i].val() + Mod1 * v1) % Mod3) * M12invM3 % Mod3;
if (v2 < 0) v2 += Mod3;
ll v3 = ((ll)c1[i].val() + Mod1 * v1 + M12mod * v2) % mod;
if (v3 < 0) v3 += mod;
c[i] = v3;
}
return c;
}
}; // namespace ConvolutionIntMod
/**
* @brief 任意 mod 畳み込み
* @docs docs/convolution/intmod.md
*/#line 2 "convolution/intmod.hpp"
#line 2 "modint/modint.hpp"
template <unsigned int m = 998244353>
struct ModInt {
using mint = ModInt;
unsigned int _v;
static constexpr unsigned int get_mod() { return m; }
static mint raw(int v) {
mint x;
x._v = v;
return x;
}
ModInt() : _v(0) {}
ModInt(int64_t v) {
long long x = (long long)(v % (long long)(umod()));
if (x < 0) x += umod();
_v = (unsigned int)(x);
}
unsigned int val() const { return _v; }
mint &operator++() {
_v++;
if (_v == umod()) _v = 0;
return *this;
}
mint &operator--() {
if (_v == 0) _v = umod();
_v--;
return *this;
}
mint operator++(int) {
mint result = *this;
++*this;
return result;
}
mint operator--(int) {
mint result = *this;
--*this;
return result;
}
mint &operator+=(const mint &rhs) {
_v += rhs._v;
if (_v >= umod()) _v -= umod();
return *this;
}
mint &operator-=(const mint &rhs) {
_v -= rhs._v;
if (_v >= umod()) _v += umod();
return *this;
}
mint &operator*=(const mint &rhs) {
unsigned long long z = _v;
z *= rhs._v;
_v = (unsigned int)(z % umod());
return *this;
}
mint &operator/=(const mint &rhs) { return *this = *this * rhs.inv(); }
mint operator+() const { return *this; }
mint operator-() const { return mint() - *this; }
mint pow(long long n) const {
assert(0 <= n);
mint x = *this, r = 1;
while (n) {
if (n & 1) r *= x;
x *= x;
n >>= 1;
}
return r;
}
mint inv() const {
assert(_v);
return pow(umod() - 2);
}
friend mint operator+(const mint &lhs, const mint &rhs) {
return mint(lhs) += rhs;
}
friend mint operator-(const mint &lhs, const mint &rhs) {
return mint(lhs) -= rhs;
}
friend mint operator*(const mint &lhs, const mint &rhs) {
return mint(lhs) *= rhs;
}
friend mint operator/(const mint &lhs, const mint &rhs) {
return mint(lhs) /= rhs;
}
friend bool operator==(const mint &lhs, const mint &rhs) {
return lhs._v == rhs._v;
}
friend bool operator!=(const mint &lhs, const mint &rhs) {
return lhs._v != rhs._v;
}
friend istream &operator>>(istream &is, mint &x) {
return is >> x._v;
}
friend ostream &operator<<(ostream &os, const mint &x) {
return os << x.val();
}
private:
static constexpr unsigned int umod() { return m; }
};
#line 2 "fft/ntt.hpp"
template <class mint>
struct NTT {
static constexpr unsigned int mod = mint::get_mod();
static constexpr unsigned long long pow_constexpr(unsigned long long x, unsigned long long n, unsigned long long m) {
unsigned long long y = 1;
while (n) {
if (n & 1) y = y * x % m;
x = x * x % m;
n >>= 1;
}
return y;
}
static constexpr unsigned int get_g() {
unsigned long long x = 2;
while (pow_constexpr(x, (mod - 1) >> 1, mod) == 1) x += 1;
return x;
}
static constexpr unsigned int g = get_g();
static constexpr int rank2 = __builtin_ctzll(mod - 1);
array<mint, rank2 + 1> root;
array<mint, rank2 + 1> iroot;
array<mint, max(0, rank2 - 2 + 1)> rate2;
array<mint, max(0, rank2 - 2 + 1)> irate2;
array<mint, max(0, rank2 - 3 + 1)> rate3;
array<mint, max(0, rank2 - 3 + 1)> irate3;
NTT() {
root[rank2] = mint(g).pow((mod - 1) >> rank2);
iroot[rank2] = root[rank2].inv();
for (int i = rank2 - 1; i >= 0; i--) {
root[i] = root[i + 1] * root[i + 1];
iroot[i] = iroot[i + 1] * iroot[i + 1];
}
{
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 2; i++) {
rate2[i] = root[i + 2] * prod;
irate2[i] = iroot[i + 2] * iprod;
prod *= iroot[i + 2];
iprod *= root[i + 2];
}
}
{
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 3; i++) {
rate3[i] = root[i + 3] * prod;
irate3[i] = iroot[i + 3] * iprod;
prod *= iroot[i + 3];
iprod *= root[i + 3];
}
}
}
void ntt(vector<mint>& a) {
int n = int(a.size());
int h = __builtin_ctzll((unsigned int)n);
a.resize(1 << h);
int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len < h) {
if (h - len == 1) {
int p = 1 << (h - len - 1);
mint rot = 1;
for (int s = 0; s < (1 << len); s++) {
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p] * rot;
a[i + offset] = l + r;
a[i + offset + p] = l - r;
}
if (s + 1 != (1 << len)) rot *= rate2[__builtin_ctzll(~(unsigned int)(s))];
}
len++;
} else {
// 4-base
int p = 1 << (h - len - 2);
mint rot = 1, imag = root[2];
for (int s = 0; s < (1 << len); s++) {
mint rot2 = rot * rot;
mint rot3 = rot2 * rot;
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto mod2 = 1ULL * mint::get_mod() * mint::get_mod();
auto a0 = 1ULL * a[i + offset].val();
auto a1 = 1ULL * a[i + offset + p].val() * rot.val();
auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val();
auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val();
auto a1na3imag = 1ULL * mint(a1 + mod2 - a3).val() * imag.val();
auto na2 = mod2 - a2;
a[i + offset] = a0 + a2 + a1 + a3;
a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
}
if (s + 1 != (1 << len)) rot *= rate3[__builtin_ctzll(~(unsigned int)(s))];
}
len += 2;
}
}
}
void intt(vector<mint>& a) {
int n = int(a.size());
int h = __builtin_ctzll((unsigned int)n);
a.resize(1 << h);
int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len) {
if (len == 1) {
int p = 1 << (h - len);
mint irot = 1;
for (int s = 0; s < (1 << (len - 1)); s++) {
int offset = s << (h - len + 1);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p];
a[i + offset] = l + r;
a[i + offset + p] = (unsigned long long)(mint::get_mod() + l.val() - r.val()) * irot.val();
}
if (s + 1 != (1 << (len - 1))) irot *= irate2[__builtin_ctzll(~(unsigned int)(s))];
}
len--;
} else {
// 4-base
int p = 1 << (h - len);
mint irot = 1, iimag = iroot[2];
for (int s = 0; s < (1 << (len - 2)); s++) {
mint irot2 = irot * irot;
mint irot3 = irot2 * irot;
int offset = s << (h - len + 2);
for (int i = 0; i < p; i++) {
auto a0 = 1ULL * a[i + offset + 0 * p].val();
auto a1 = 1ULL * a[i + offset + 1 * p].val();
auto a2 = 1ULL * a[i + offset + 2 * p].val();
auto a3 = 1ULL * a[i + offset + 3 * p].val();
auto a2na3iimag = 1ULL * mint((mint::get_mod() + a2 - a3) * iimag.val()).val();
a[i + offset] = a0 + a1 + a2 + a3;
a[i + offset + 1 * p] = (a0 + (mint::get_mod() - a1) + a2na3iimag) * irot.val();
a[i + offset + 2 * p] = (a0 + a1 + (mint::get_mod() - a2) + (mint::get_mod() - a3)) * irot2.val();
a[i + offset + 3 * p] = (a0 + (mint::get_mod() - a1) + (mint::get_mod() - a2na3iimag)) * irot3.val();
}
if (s + 1 != (1 << (len - 2))) irot *= irate3[__builtin_ctzll(~(unsigned int)(s))];
}
len -= 2;
}
}
mint e = mint(n).inv();
for (auto& x : a) x *= e;
}
vector<mint> multiply(const vector<mint>& a, const vector<mint>& b) {
if (a.empty() || b.empty()) return vector<mint>();
int n = a.size(), m = b.size();
int sz = n + m - 1;
if (n <= 30 || m <= 30) {
if (n > 30) return multiply(b, a);
vector<mint> res(sz);
for (int i = 0; i < n; i++)
for (int j = 0; j < m; j++) res[i + j] += a[i] * b[j];
return res;
}
int sz1 = 1;
while (sz1 < sz) sz1 <<= 1;
vector<mint> res(sz1);
for (int i = 0; i < n; i++) res[i] = a[i];
ntt(res);
if (a == b)
for (int i = 0; i < sz1; i++) res[i] *= res[i];
else {
vector<mint> c(sz1);
for (int i = 0; i < m; i++) c[i] = b[i];
ntt(c);
for (int i = 0; i < sz1; i++) res[i] *= c[i];
}
intt(res);
res.resize(sz);
return res;
}
// c[i]=sum[j]a[j]b[i+j]
vector<mint> middle_product(const vector<mint>& a, const vector<mint>& b) {
if (b.empty() || a.size() > b.size()) return {};
int n = a.size(), m = b.size();
int sz = m - n + 1;
if (n <= 30 || sz <= 30) {
vector<mint> res(sz);
for (int i = 0; i < sz; i++)
for (int j = 0; j < n; j++) res[i] += a[j] * b[i + j];
return res;
}
int sz1 = 1;
while (sz1 < m) sz1 <<= 1;
vector<mint> res(sz1), b2(sz1);
reverse_copy(a.begin(), a.end(), res.begin());
copy(b.begin(), b.end(), b2.begin());
ntt(res);
ntt(b2);
for (int i = 0; i < res.size(); i++) res[i] *= b2[i];
intt(res);
res.resize(m);
res.erase(res.begin(), res.begin() + n - 1);
return res;
}
void ntt_doubling(vector<mint>& a) {
int n = (int)a.size();
auto b = a;
intt(b);
mint r = 1, zeta = mint(g).pow((mint::get_mod() - 1) / (n << 1));
for (int i = 0; i < n; i++) b[i] *= r, r *= zeta;
ntt(b);
copy(b.begin(), b.end(), back_inserter(a));
}
};
/**
* @brief NTT (数論変換)
* @docs docs/fft/ntt.md
*/
#line 5 "convolution/intmod.hpp"
namespace ConvolutionIntMod {
using ll = long long;
const ll Mod1 = 754974721;
const ll Mod2 = 167772161;
const ll Mod3 = 469762049;
const ll M1invM2 = 95869806;
const ll M12invM3 = 187290749;
using M1 = ModInt<Mod1>;
using M2 = ModInt<Mod2>;
using M3 = ModInt<Mod3>;
NTT<M1> ntt1;
NTT<M2> ntt2;
NTT<M3> ntt3;
template <class mint>
vector<mint> multiply(const vector<mint>& a, const vector<mint>& b) {
if (a.empty() || b.empty()) return {};
int mod = mint::get_mod();
ll M12mod = Mod1 * Mod2 % mod;
vector<unsigned int> a0(a.size()), b0(b.size());
for (int i = 0; i < a.size(); i++) a0[i] = a[i].val();
for (int i = 0; i < b.size(); i++) b0[i] = b[i].val();
vector<M1> a1(a0.begin(), a0.end()), b1(b0.begin(), b0.end()), c1 = ntt1.multiply(a1, b1);
vector<M2> a2(a0.begin(), a0.end()), b2(b0.begin(), b0.end()), c2 = ntt2.multiply(a2, b2);
vector<M3> a3(a0.begin(), a0.end()), b3(b0.begin(), b0.end()), c3 = ntt3.multiply(a3, b3);
vector<mint> c(a.size() + b.size() - 1, 0);
for (int i = 0; i < c.size(); i++) {
ll v1 = ((ll)c2[i].val() - (ll)c1[i].val()) * M1invM2 % Mod2;
if (v1 < 0) v1 += Mod2;
ll v2 = ((ll)c3[i].val() - ((ll)c1[i].val() + Mod1 * v1) % Mod3) * M12invM3 % Mod3;
if (v2 < 0) v2 += Mod3;
ll v3 = ((ll)c1[i].val() + Mod1 * v1 + M12mod * v2) % mod;
if (v3 < 0) v3 += mod;
c[i] = v3;
}
return c;
}
template <class mint>
vector<mint> middle_product(const vector<mint>& a, const vector<mint>& b) {
if (b.empty() || a.size() > b.size()) return {};
int mod = mint::get_mod();
ll M12mod = Mod1 * Mod2 % mod;
vector<unsigned int> a0(a.size()), b0(b.size());
for (int i = 0; i < a.size(); i++) a0[i] = a[i].val();
for (int i = 0; i < b.size(); i++) b0[i] = b[i].val();
vector<M1> a1(a0.begin(), a0.end()), b1(b0.begin(), b0.end()), c1 = ntt1.middle_product(a1, b1);
vector<M2> a2(a0.begin(), a0.end()), b2(b0.begin(), b0.end()), c2 = ntt2.middle_product(a2, b2);
vector<M3> a3(a0.begin(), a0.end()), b3(b0.begin(), b0.end()), c3 = ntt3.middle_product(a3, b3);
vector<mint> c(c1.size(), 0);
for (int i = 0; i < c.size(); i++) {
ll v1 = ((ll)c2[i].val() - (ll)c1[i].val()) * M1invM2 % Mod2;
if (v1 < 0) v1 += Mod2;
ll v2 = ((ll)c3[i].val() - ((ll)c1[i].val() + Mod1 * v1) % Mod3) * M12invM3 % Mod3;
if (v2 < 0) v2 += Mod3;
ll v3 = ((ll)c1[i].val() + Mod1 * v1 + M12mod * v2) % mod;
if (v3 < 0) v3 += mod;
c[i] = v3;
}
return c;
}
}; // namespace ConvolutionIntMod
/**
* @brief 任意 mod 畳み込み
* @docs docs/convolution/intmod.md
*/