畳み込み mod 2^64
(convolution/mod2_64.hpp)
Depends on
Verified with
Code
#pragma once
#include "modint/modint.hpp"
#include "fft/ntt.hpp"
#include "math/util.hpp"
namespace ConvolutionMod2_64 {
using ull = unsigned long long;
static constexpr ull M1 = 645922817;
static constexpr ull M2 = 754974721;
static constexpr ull M3 = 880803841;
static constexpr ull M4 = 897581057;
static constexpr ull M5 = 998244353;
static constexpr ull M12M4 = M1 * M2 % M4;
static constexpr ull M12M5 = M1 * M2 % M5;
static constexpr ull M123M5 = M12M5 * M3 % M5;
static constexpr ull M12 = M1 * M2;
static constexpr ull M123 = M12 * M3;
static constexpr ull M1234 = M123 * M4;
static constexpr ull I2 = Math::inv_mod(M1, M2);
static constexpr ull I3 = Math::inv_mod(M1 * M2 % M3, M3);
static constexpr ull I4 = Math::inv_mod(M1 * M2 % M4 * M3 % M4, M4);
static constexpr ull I5 = Math::inv_mod(M1 * M2 % M5 * M3 % M5 * M4 % M5, M5);
using mint1 = ModInt<M1>;
using mint2 = ModInt<M2>;
using mint3 = ModInt<M3>;
using mint4 = ModInt<M4>;
using mint5 = ModInt<M5>;
NTT<mint1> ntt1;
NTT<mint2> ntt2;
NTT<mint3> ntt3;
NTT<mint4> ntt4;
NTT<mint5> ntt5;
template <class mint>
vector<mint> inner_mult(const vector<ull>& a, const vector<ull>& b, NTT<mint>& ntt) {
constexpr unsigned int mod = mint::get_mod();
vector<mint> a1(a.size()), b1(b.size());
for (int i = 0; i < a.size(); i++) a1[i] = a[i] % mod;
for (int i = 0; i < b.size(); i++) b1[i] = b[i] % mod;
mint c = ntt.multiply(a1, b1)[0];
return ntt.multiply(a1, b1);
}
template <class mint>
vector<mint> inner_middle_prod(const vector<ull>& a, const vector<ull>& b, NTT<mint>& ntt) {
constexpr unsigned int mod = mint::get_mod();
vector<mint> a1(a.size()), b1(b.size());
for (int i = 0; i < a.size(); i++) a1[i] = a[i] % mod;
for (int i = 0; i < b.size(); i++) b1[i] = b[i] % mod;
return ntt.middle_product(a1, b1);
}
vector<ull> multiply(const vector<ull>& a, const vector<ull>& b) {
if (a.empty() || b.empty()) return {};
auto c1 = inner_mult(a, b, ntt1);
auto c2 = inner_mult(a, b, ntt2);
auto c3 = inner_mult(a, b, ntt3);
auto c4 = inner_mult(a, b, ntt4);
auto c5 = inner_mult(a, b, ntt5);
vector<ull> c(a.size() + b.size() - 1, 0);
for (int i = 0; i < c.size(); i++) {
ull y1 = c1[i].val();
ull y2 = (c2[i].val() + M2 - y1) * I2 % M2;
ull y3 = (c3[i].val() + M3 - (y1 + y2 * M1) % M3) * I3 % M3;
ull y4 = (c4[i].val() + M4 - (y1 + y2 * M1 + y3 * M12M4) % M4) * I4 % M4;
ull y5 = (c5[i].val() + M5 - (y1 + y2 * M1 + y3 * M12M5 + y4 * M123M5) % M5) * I5 % M5;
c[i] = y1 + y2 * M1 + y3 * M12 + y4 * M123 + y5 * M1234;
}
return c;
}
vector<ull> middle_product(const vector<ull>& a, const vector<ull>& b) {
if (b.empty() || a.size() > b.size()) return {};
auto c1 = inner_middle_prod(a, b, ntt1);
auto c2 = inner_middle_prod(a, b, ntt2);
auto c3 = inner_middle_prod(a, b, ntt3);
auto c4 = inner_middle_prod(a, b, ntt4);
auto c5 = inner_middle_prod(a, b, ntt5);
vector<ull> c(c1.size(), 0);
for (int i = 0; i < c.size(); i++) {
ull y1 = c1[i].val();
ull y2 = (c2[i].val() + M2 - y1) * I2 % M2;
ull y3 = (c3[i].val() + M3 * 2 - (y1 + y2 * M1 % M3)) * I3 % M3;
ull y4 = (c4[i].val() + M4 * 3 - (y1 + y2 * M1 + y3 * M12M4) % M4) * I4 % M4;
ull y5 = (c5[i].val() + M5 * 4 - (y1 + y2 * M1 + y3 * M12M5 + y4 * M123M5) % M5) * I5 % M5;
c[i] = y1 + y2 * M1 + y3 * M12 + y4 * M123 + y5 * M1234;
}
return c;
}
}; // namespace ConvolutionMod2_64
/**
* @brief 畳み込み mod 2^64
*/
#line 2 "convolution/mod2_64.hpp"
#line 2 "math/util.hpp"
namespace Math {
template <class T>
T safe_mod(T a, T b) {
assert(b != 0);
if (b < 0) a = -a, b = -b;
a %= b;
return a >= 0 ? a : a + b;
}
template <class T>
T floor(T a, T b) {
assert(b != 0);
if (b < 0) a = -a, b = -b;
return a >= 0 ? a / b : (a + 1) / b - 1;
}
template <class T>
T ceil(T a, T b) {
assert(b != 0);
if (b < 0) a = -a, b = -b;
return a > 0 ? (a - 1) / b + 1 : a / b;
}
long long isqrt(long long n) {
if (n <= 0) return 0;
long long x = sqrt(n);
while ((x + 1) * (x + 1) <= n) x++;
while (x * x > n) x--;
return x;
}
// return g=gcd(a,b)
// a*x+b*y=g
// - b!=0 -> 0<=x<|b|/g
// - b=0 -> ax=g
template <class T>
T ext_gcd(T a, T b, T& x, T& y) {
T a0 = a, b0 = b;
bool sgn_a = a < 0, sgn_b = b < 0;
if (sgn_a) a = -a;
if (sgn_b) b = -b;
if (b == 0) {
x = sgn_a ? -1 : 1;
y = 0;
return a;
}
T x00 = 1, x01 = 0, x10 = 0, x11 = 1;
while (b != 0) {
T q = a / b, r = a - b * q;
x00 -= q * x01;
x10 -= q * x11;
swap(x00, x01);
swap(x10, x11);
a = b, b = r;
}
x = x00, y = x10;
if (sgn_a) x = -x;
if (sgn_b) y = -y;
if (b0 != 0) {
a0 /= a, b0 /= a;
if (b0 < 0) a0 = -a0, b0 = -b0;
T q = x >= 0 ? x / b0 : (x + 1) / b0 - 1;
x -= b0 * q;
y += a0 * q;
}
return a;
}
constexpr long long inv_mod(long long x, long long m) {
x %= m;
if (x < 0) x += m;
long long a = m, b = x;
long long y0 = 0, y1 = 1;
while (b > 0) {
long long q = a / b;
swap(a -= q * b, b);
swap(y0 -= q * y1, y1);
}
if (y0 < 0) y0 += m / a;
return y0;
}
long long pow_mod(long long x, long long n, long long m) {
x = (x % m + m) % m;
long long y = 1;
while (n) {
if (n & 1) y = y * x % m;
x = x * x % m;
n >>= 1;
}
return y;
}
constexpr long long pow_mod_constexpr(long long x, long long n, int m) {
if (m == 1) return 0;
unsigned int _m = (unsigned int)(m);
unsigned long long r = 1;
unsigned long long y = x % m;
if (y >= m) y += m;
while (n) {
if (n & 1) r = (r * y) % _m;
y = (y * y) % _m;
n >>= 1;
}
return r;
}
constexpr bool is_prime_constexpr(int n) {
if (n <= 1) return false;
if (n == 2 || n == 7 || n == 61) return true;
if (n % 2 == 0) return false;
long long d = n - 1;
while (d % 2 == 0) d /= 2;
constexpr long long bases[3] = {2, 7, 61};
for (long long a : bases) {
long long t = d;
long long y = pow_mod_constexpr(a, t, n);
while (t != n - 1 && y != 1 && y != n - 1) {
y = y * y % n;
t <<= 1;
}
if (y != n - 1 && t % 2 == 0) {
return false;
}
}
return true;
}
template <int n>
constexpr bool is_prime = is_prime_constexpr(n);
}; // namespace Math
#line 3 "modint/modint.hpp"
template <unsigned int m = 998244353>
struct ModInt {
using mint = ModInt;
static constexpr unsigned int get_mod() { return m; }
static mint raw(int v) {
mint x;
x._v = v;
return x;
}
ModInt() : _v(0) {}
ModInt(int64_t v) {
long long x = (long long)(v % (long long)(umod()));
if (x < 0) x += umod();
_v = (unsigned int)(x);
}
unsigned int val() const { return _v; }
mint& operator++() {
_v++;
if (_v == umod()) _v = 0;
return *this;
}
mint& operator--() {
if (_v == 0) _v = umod();
_v--;
return *this;
}
mint operator++(int) {
mint result = *this;
++*this;
return result;
}
mint operator--(int) {
mint result = *this;
--*this;
return result;
}
mint& operator+=(const mint& rhs) {
_v += rhs._v;
if (_v >= umod()) _v -= umod();
return *this;
}
mint& operator-=(const mint& rhs) {
_v -= rhs._v;
if (_v >= umod()) _v += umod();
return *this;
}
mint& operator*=(const mint& rhs) {
unsigned long long z = _v;
z *= rhs._v;
_v = (unsigned int)(z % umod());
return *this;
}
mint& operator/=(const mint& rhs) { return *this *= rhs.inv(); }
mint operator+() const { return *this; }
mint operator-() const { return mint() - *this; }
mint pow(long long n) const {
assert(0 <= n);
mint x = *this, r = 1;
while (n) {
if (n & 1) r *= x;
x *= x;
n >>= 1;
}
return r;
}
mint inv() const {
if (is_prime) {
assert(_v);
return pow(umod() - 2);
} else {
auto inv = Math::inv_mod(_v, umod());
return raw(inv);
}
}
friend mint operator+(const mint& lhs, const mint& rhs) { return mint(lhs) += rhs; }
friend mint operator-(const mint& lhs, const mint& rhs) { return mint(lhs) -= rhs; }
friend mint operator*(const mint& lhs, const mint& rhs) { return mint(lhs) *= rhs; }
friend mint operator/(const mint& lhs, const mint& rhs) { return mint(lhs) /= rhs; }
friend bool operator==(const mint& lhs, const mint& rhs) { return lhs._v == rhs._v; }
friend bool operator!=(const mint& lhs, const mint& rhs) { return lhs._v != rhs._v; }
friend istream& operator>>(istream& is, mint& x) {
int64_t v;
is >> v;
x = mint(v);
return is;
}
friend ostream& operator<<(ostream& os, const mint& x) { return os << x.val(); }
private:
unsigned int _v;
static constexpr unsigned int umod() { return m; }
static constexpr bool is_prime = Math::is_prime<m>;
};
#line 2 "fft/ntt.hpp"
template <class mint>
struct NTT {
static constexpr unsigned int mod = mint::get_mod();
static constexpr unsigned long long pow_constexpr(unsigned long long x, unsigned long long n, unsigned long long m) {
unsigned long long y = 1;
while (n) {
if (n & 1) y = y * x % m;
x = x * x % m;
n >>= 1;
}
return y;
}
static constexpr unsigned int get_g() {
unsigned long long x = 2;
while (pow_constexpr(x, (mod - 1) >> 1, mod) == 1) x += 1;
return x;
}
static constexpr unsigned int g = get_g();
static constexpr int rank2 = __builtin_ctzll(mod - 1);
array<mint, rank2 + 1> root;
array<mint, rank2 + 1> iroot;
array<mint, max(0, rank2 - 2 + 1)> rate2;
array<mint, max(0, rank2 - 2 + 1)> irate2;
array<mint, max(0, rank2 - 3 + 1)> rate3;
array<mint, max(0, rank2 - 3 + 1)> irate3;
NTT() {
root[rank2] = mint(g).pow((mod - 1) >> rank2);
iroot[rank2] = root[rank2].inv();
for (int i = rank2 - 1; i >= 0; i--) {
root[i] = root[i + 1] * root[i + 1];
iroot[i] = iroot[i + 1] * iroot[i + 1];
}
{
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 2; i++) {
rate2[i] = root[i + 2] * prod;
irate2[i] = iroot[i + 2] * iprod;
prod *= iroot[i + 2];
iprod *= root[i + 2];
}
}
{
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 3; i++) {
rate3[i] = root[i + 3] * prod;
irate3[i] = iroot[i + 3] * iprod;
prod *= iroot[i + 3];
iprod *= root[i + 3];
}
}
}
void ntt(vector<mint>& a) {
int n = int(a.size());
int h = __builtin_ctzll((unsigned int)n);
a.resize(1 << h);
int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len < h) {
if (h - len == 1) {
int p = 1 << (h - len - 1);
mint rot = 1;
for (int s = 0; s < (1 << len); s++) {
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p] * rot;
a[i + offset] = l + r;
a[i + offset + p] = l - r;
}
if (s + 1 != (1 << len)) rot *= rate2[__builtin_ctzll(~(unsigned int)(s))];
}
len++;
} else {
// 4-base
int p = 1 << (h - len - 2);
mint rot = 1, imag = root[2];
for (int s = 0; s < (1 << len); s++) {
mint rot2 = rot * rot;
mint rot3 = rot2 * rot;
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto mod2 = 1ULL * mint::get_mod() * mint::get_mod();
auto a0 = 1ULL * a[i + offset].val();
auto a1 = 1ULL * a[i + offset + p].val() * rot.val();
auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val();
auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val();
auto a1na3imag = 1ULL * mint(a1 + mod2 - a3).val() * imag.val();
auto na2 = mod2 - a2;
a[i + offset] = a0 + a2 + a1 + a3;
a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
}
if (s + 1 != (1 << len)) rot *= rate3[__builtin_ctzll(~(unsigned int)(s))];
}
len += 2;
}
}
}
void intt(vector<mint>& a) {
int n = int(a.size());
int h = __builtin_ctzll((unsigned int)n);
a.resize(1 << h);
int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len) {
if (len == 1) {
int p = 1 << (h - len);
mint irot = 1;
for (int s = 0; s < (1 << (len - 1)); s++) {
int offset = s << (h - len + 1);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p];
a[i + offset] = l + r;
a[i + offset + p] = (unsigned long long)(mint::get_mod() + l.val() - r.val()) * irot.val();
}
if (s + 1 != (1 << (len - 1))) irot *= irate2[__builtin_ctzll(~(unsigned int)(s))];
}
len--;
} else {
// 4-base
int p = 1 << (h - len);
mint irot = 1, iimag = iroot[2];
for (int s = 0; s < (1 << (len - 2)); s++) {
mint irot2 = irot * irot;
mint irot3 = irot2 * irot;
int offset = s << (h - len + 2);
for (int i = 0; i < p; i++) {
auto a0 = 1ULL * a[i + offset + 0 * p].val();
auto a1 = 1ULL * a[i + offset + 1 * p].val();
auto a2 = 1ULL * a[i + offset + 2 * p].val();
auto a3 = 1ULL * a[i + offset + 3 * p].val();
auto a2na3iimag = 1ULL * mint((mint::get_mod() + a2 - a3) * iimag.val()).val();
a[i + offset] = a0 + a1 + a2 + a3;
a[i + offset + 1 * p] = (a0 + (mint::get_mod() - a1) + a2na3iimag) * irot.val();
a[i + offset + 2 * p] = (a0 + a1 + (mint::get_mod() - a2) + (mint::get_mod() - a3)) * irot2.val();
a[i + offset + 3 * p] = (a0 + (mint::get_mod() - a1) + (mint::get_mod() - a2na3iimag)) * irot3.val();
}
if (s + 1 != (1 << (len - 2))) irot *= irate3[__builtin_ctzll(~(unsigned int)(s))];
}
len -= 2;
}
}
mint e = mint(n).inv();
for (auto& x : a) x *= e;
}
vector<mint> multiply(const vector<mint>& a, const vector<mint>& b) {
if (a.empty() || b.empty()) return vector<mint>();
int n = a.size(), m = b.size();
int sz = n + m - 1;
if (n <= 30 || m <= 30) {
if (n > 30) return multiply(b, a);
vector<mint> res(sz);
for (int i = 0; i < n; i++)
for (int j = 0; j < m; j++) res[i + j] += a[i] * b[j];
return res;
}
int sz1 = 1;
while (sz1 < sz) sz1 <<= 1;
vector<mint> res(sz1);
for (int i = 0; i < n; i++) res[i] = a[i];
ntt(res);
if (a == b)
for (int i = 0; i < sz1; i++) res[i] *= res[i];
else {
vector<mint> c(sz1);
for (int i = 0; i < m; i++) c[i] = b[i];
ntt(c);
for (int i = 0; i < sz1; i++) res[i] *= c[i];
}
intt(res);
res.resize(sz);
return res;
}
// c[i]=sum[j]a[j]b[i+j]
vector<mint> middle_product(const vector<mint>& a, const vector<mint>& b) {
if (b.empty() || a.size() > b.size()) return {};
int n = a.size(), m = b.size();
int sz = m - n + 1;
if (n <= 30 || sz <= 30) {
vector<mint> res(sz);
for (int i = 0; i < sz; i++)
for (int j = 0; j < n; j++) res[i] += a[j] * b[i + j];
return res;
}
int sz1 = 1;
while (sz1 < m) sz1 <<= 1;
vector<mint> res(sz1), b2(sz1);
reverse_copy(a.begin(), a.end(), res.begin());
copy(b.begin(), b.end(), b2.begin());
ntt(res);
ntt(b2);
for (int i = 0; i < res.size(); i++) res[i] *= b2[i];
intt(res);
res.resize(m);
res.erase(res.begin(), res.begin() + n - 1);
return res;
}
void ntt_doubling(vector<mint>& a) {
int n = (int)a.size();
auto b = a;
intt(b);
mint r = 1, zeta = mint(g).pow((mint::get_mod() - 1) / (n << 1));
for (int i = 0; i < n; i++) b[i] *= r, r *= zeta;
ntt(b);
copy(b.begin(), b.end(), back_inserter(a));
}
};
/**
* @brief NTT (数論変換)
* @docs docs/fft/ntt.md
*/
#line 6 "convolution/mod2_64.hpp"
namespace ConvolutionMod2_64 {
using ull = unsigned long long;
static constexpr ull M1 = 645922817;
static constexpr ull M2 = 754974721;
static constexpr ull M3 = 880803841;
static constexpr ull M4 = 897581057;
static constexpr ull M5 = 998244353;
static constexpr ull M12M4 = M1 * M2 % M4;
static constexpr ull M12M5 = M1 * M2 % M5;
static constexpr ull M123M5 = M12M5 * M3 % M5;
static constexpr ull M12 = M1 * M2;
static constexpr ull M123 = M12 * M3;
static constexpr ull M1234 = M123 * M4;
static constexpr ull I2 = Math::inv_mod(M1, M2);
static constexpr ull I3 = Math::inv_mod(M1 * M2 % M3, M3);
static constexpr ull I4 = Math::inv_mod(M1 * M2 % M4 * M3 % M4, M4);
static constexpr ull I5 = Math::inv_mod(M1 * M2 % M5 * M3 % M5 * M4 % M5, M5);
using mint1 = ModInt<M1>;
using mint2 = ModInt<M2>;
using mint3 = ModInt<M3>;
using mint4 = ModInt<M4>;
using mint5 = ModInt<M5>;
NTT<mint1> ntt1;
NTT<mint2> ntt2;
NTT<mint3> ntt3;
NTT<mint4> ntt4;
NTT<mint5> ntt5;
template <class mint>
vector<mint> inner_mult(const vector<ull>& a, const vector<ull>& b, NTT<mint>& ntt) {
constexpr unsigned int mod = mint::get_mod();
vector<mint> a1(a.size()), b1(b.size());
for (int i = 0; i < a.size(); i++) a1[i] = a[i] % mod;
for (int i = 0; i < b.size(); i++) b1[i] = b[i] % mod;
mint c = ntt.multiply(a1, b1)[0];
return ntt.multiply(a1, b1);
}
template <class mint>
vector<mint> inner_middle_prod(const vector<ull>& a, const vector<ull>& b, NTT<mint>& ntt) {
constexpr unsigned int mod = mint::get_mod();
vector<mint> a1(a.size()), b1(b.size());
for (int i = 0; i < a.size(); i++) a1[i] = a[i] % mod;
for (int i = 0; i < b.size(); i++) b1[i] = b[i] % mod;
return ntt.middle_product(a1, b1);
}
vector<ull> multiply(const vector<ull>& a, const vector<ull>& b) {
if (a.empty() || b.empty()) return {};
auto c1 = inner_mult(a, b, ntt1);
auto c2 = inner_mult(a, b, ntt2);
auto c3 = inner_mult(a, b, ntt3);
auto c4 = inner_mult(a, b, ntt4);
auto c5 = inner_mult(a, b, ntt5);
vector<ull> c(a.size() + b.size() - 1, 0);
for (int i = 0; i < c.size(); i++) {
ull y1 = c1[i].val();
ull y2 = (c2[i].val() + M2 - y1) * I2 % M2;
ull y3 = (c3[i].val() + M3 - (y1 + y2 * M1) % M3) * I3 % M3;
ull y4 = (c4[i].val() + M4 - (y1 + y2 * M1 + y3 * M12M4) % M4) * I4 % M4;
ull y5 = (c5[i].val() + M5 - (y1 + y2 * M1 + y3 * M12M5 + y4 * M123M5) % M5) * I5 % M5;
c[i] = y1 + y2 * M1 + y3 * M12 + y4 * M123 + y5 * M1234;
}
return c;
}
vector<ull> middle_product(const vector<ull>& a, const vector<ull>& b) {
if (b.empty() || a.size() > b.size()) return {};
auto c1 = inner_middle_prod(a, b, ntt1);
auto c2 = inner_middle_prod(a, b, ntt2);
auto c3 = inner_middle_prod(a, b, ntt3);
auto c4 = inner_middle_prod(a, b, ntt4);
auto c5 = inner_middle_prod(a, b, ntt5);
vector<ull> c(c1.size(), 0);
for (int i = 0; i < c.size(); i++) {
ull y1 = c1[i].val();
ull y2 = (c2[i].val() + M2 - y1) * I2 % M2;
ull y3 = (c3[i].val() + M3 * 2 - (y1 + y2 * M1 % M3)) * I3 % M3;
ull y4 = (c4[i].val() + M4 * 3 - (y1 + y2 * M1 + y3 * M12M4) % M4) * I4 % M4;
ull y5 = (c5[i].val() + M5 * 4 - (y1 + y2 * M1 + y3 * M12M5 + y4 * M123M5) % M5) * I5 % M5;
c[i] = y1 + y2 * M1 + y3 * M12 + y4 * M123 + y5 * M1234;
}
return c;
}
}; // namespace ConvolutionMod2_64
/**
* @brief 畳み込み mod 2^64
*/
Back to top page