#line 1 "verify/fps/LC_sqrt_of_formal_power_series.relaxed.test.cpp"
#define PROBLEM "https://judge.yosupo.jp/problem/sqrt_of_formal_power_series"
#line 2 "template/template.hpp"
#include <bits/stdc++.h>
using namespace std;
#line 2 "template/macro.hpp"
#define rep(i, a, b) for (int i = (a); i < (int)(b); i++)
#define rrep(i, a, b) for (int i = (int)(b) - 1; i >= (a); i--)
#define ALL(v) (v).begin(), (v).end()
#define UNIQUE(v) sort(ALL(v)), (v).erase(unique(ALL(v)), (v).end())
#define SZ(v) (int)v.size()
#define MIN(v) *min_element(ALL(v))
#define MAX(v) *max_element(ALL(v))
#define LB(v, x) int(lower_bound(ALL(v), (x)) - (v).begin())
#define UB(v, x) int(upper_bound(ALL(v), (x)) - (v).begin())
#define YN(b) cout << ((b) ? "YES" : "NO") << "\n";
#define Yn(b) cout << ((b) ? "Yes" : "No") << "\n";
#define yn(b) cout << ((b) ? "yes" : "no") << "\n";
#line 6 "template/template.hpp"
#line 2 "template/util.hpp"
using uint = unsigned int;
using ll = long long int;
using ull = unsigned long long;
using i128 = __int128_t;
using u128 = __uint128_t;
template <class T, class S = T>
S SUM(const vector<T> &a) {
return accumulate(ALL(a), S(0));
}
template <class T>
inline bool chmin(T &a, T b) {
if (a > b) {
a = b;
return true;
}
return false;
}
template <class T>
inline bool chmax(T &a, T b) {
if (a < b) {
a = b;
return true;
}
return false;
}
template <class T>
int popcnt(T x) {
return __builtin_popcountll(x);
}
template <class T>
int topbit(T x) {
return (x == 0 ? -1 : 63 - __builtin_clzll(x));
}
template <class T>
int lowbit(T x) {
return (x == 0 ? -1 : __builtin_ctzll(x));
}
#line 8 "template/template.hpp"
#line 2 "template/inout.hpp"
struct Fast {
Fast() {
cin.tie(nullptr);
ios_base::sync_with_stdio(false);
cout << fixed << setprecision(15);
}
} fast;
template <class T1, class T2>
istream &operator>>(istream &is, pair<T1, T2> &p) {
return is >> p.first >> p.second;
}
template <class T1, class T2>
ostream &operator<<(ostream &os, const pair<T1, T2> &p) {
return os << p.first << " " << p.second;
}
template <class T>
istream &operator>>(istream &is, vector<T> &a) {
for (auto &v : a) is >> v;
return is;
}
template <class T>
ostream &operator<<(ostream &os, const vector<T> &a) {
for (auto it = a.begin(); it != a.end();) {
os << *it;
if (++it != a.end()) os << " ";
}
return os;
}
template <class T>
ostream &operator<<(ostream &os, const set<T> &st) {
os << "{";
for (auto it = st.begin(); it != st.end();) {
os << *it;
if (++it != st.end()) os << ",";
}
os << "}";
return os;
}
template <class T1, class T2>
ostream &operator<<(ostream &os, const map<T1, T2> &mp) {
os << "{";
for (auto it = mp.begin(); it != mp.end();) {
os << it->first << ":" << it->second;
if (++it != mp.end()) os << ",";
}
os << "}";
return os;
}
void in() {}
template <typename T, class... U>
void in(T &t, U &...u) {
cin >> t;
in(u...);
}
void out() { cout << "\n"; }
template <typename T, class... U, char sep = ' '>
void out(const T &t, const U &...u) {
cout << t;
if (sizeof...(u)) cout << sep;
out(u...);
}
#line 10 "template/template.hpp"
#line 2 "template/debug.hpp"
#ifdef LOCAL
#define debug 1
#define show(...) _show(0, #__VA_ARGS__, __VA_ARGS__)
#else
#define debug 0
#define show(...) true
#endif
template <class T>
void _show(int i, T name) {
cerr << '\n';
}
template <class T1, class T2, class... T3>
void _show(int i, const T1 &a, const T2 &b, const T3 &...c) {
for (; a[i] != ',' && a[i] != '\0'; i++) cerr << a[i];
cerr << ":" << b << " ";
_show(i + 1, a, c...);
}
#line 2 "modint/modint.hpp"
template <unsigned int m = 998244353>
struct ModInt {
using mint = ModInt;
unsigned int _v;
static constexpr unsigned int get_mod() { return m; }
static mint raw(int v) {
mint x;
x._v = v;
return x;
}
ModInt() : _v(0) {}
ModInt(int64_t v) {
long long x = (long long)(v % (long long)(umod()));
if (x < 0) x += umod();
_v = (unsigned int)(x);
}
unsigned int val() const { return _v; }
mint &operator++() {
_v++;
if (_v == umod()) _v = 0;
return *this;
}
mint &operator--() {
if (_v == 0) _v = umod();
_v--;
return *this;
}
mint operator++(int) {
mint result = *this;
++*this;
return result;
}
mint operator--(int) {
mint result = *this;
--*this;
return result;
}
mint &operator+=(const mint &rhs) {
_v += rhs._v;
if (_v >= umod()) _v -= umod();
return *this;
}
mint &operator-=(const mint &rhs) {
_v -= rhs._v;
if (_v >= umod()) _v += umod();
return *this;
}
mint &operator*=(const mint &rhs) {
unsigned long long z = _v;
z *= rhs._v;
_v = (unsigned int)(z % umod());
return *this;
}
mint &operator/=(const mint &rhs) { return *this = *this * rhs.inv(); }
mint operator+() const { return *this; }
mint operator-() const { return mint() - *this; }
mint pow(long long n) const {
assert(0 <= n);
mint x = *this, r = 1;
while (n) {
if (n & 1) r *= x;
x *= x;
n >>= 1;
}
return r;
}
mint inv() const {
assert(_v);
return pow(umod() - 2);
}
friend mint operator+(const mint &lhs, const mint &rhs) {
return mint(lhs) += rhs;
}
friend mint operator-(const mint &lhs, const mint &rhs) {
return mint(lhs) -= rhs;
}
friend mint operator*(const mint &lhs, const mint &rhs) {
return mint(lhs) *= rhs;
}
friend mint operator/(const mint &lhs, const mint &rhs) {
return mint(lhs) /= rhs;
}
friend bool operator==(const mint &lhs, const mint &rhs) {
return lhs._v == rhs._v;
}
friend bool operator!=(const mint &lhs, const mint &rhs) {
return lhs._v != rhs._v;
}
friend istream &operator>>(istream &is, mint &x) {
return is >> x._v;
}
friend ostream &operator<<(ostream &os, const mint &x) {
return os << x.val();
}
private:
static constexpr unsigned int umod() { return m; }
};
#line 5 "verify/fps/LC_sqrt_of_formal_power_series.relaxed.test.cpp"
using mint = ModInt<998244353>;
#line 2 "fps/fps-ntt-friendly.hpp"
#line 2 "fft/ntt.hpp"
template <class mint>
struct NTT {
static constexpr unsigned int mod = mint::get_mod();
static constexpr unsigned long long pow_constexpr(unsigned long long x, unsigned long long n, unsigned long long m) {
unsigned long long y = 1;
while (n) {
if (n & 1) y = y * x % m;
x = x * x % m;
n >>= 1;
}
return y;
}
static constexpr unsigned int get_g() {
unsigned long long x = 2;
while (pow_constexpr(x, (mod - 1) >> 1, mod) == 1) x += 1;
return x;
}
static constexpr unsigned int g = get_g();
static constexpr int rank2 = __builtin_ctzll(mod - 1);
array<mint, rank2 + 1> root;
array<mint, rank2 + 1> iroot;
array<mint, max(0, rank2 - 2 + 1)> rate2;
array<mint, max(0, rank2 - 2 + 1)> irate2;
array<mint, max(0, rank2 - 3 + 1)> rate3;
array<mint, max(0, rank2 - 3 + 1)> irate3;
NTT() {
root[rank2] = mint(g).pow((mod - 1) >> rank2);
iroot[rank2] = root[rank2].inv();
for (int i = rank2 - 1; i >= 0; i--) {
root[i] = root[i + 1] * root[i + 1];
iroot[i] = iroot[i + 1] * iroot[i + 1];
}
{
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 2; i++) {
rate2[i] = root[i + 2] * prod;
irate2[i] = iroot[i + 2] * iprod;
prod *= iroot[i + 2];
iprod *= root[i + 2];
}
}
{
mint prod = 1, iprod = 1;
for (int i = 0; i <= rank2 - 3; i++) {
rate3[i] = root[i + 3] * prod;
irate3[i] = iroot[i + 3] * iprod;
prod *= iroot[i + 3];
iprod *= root[i + 3];
}
}
}
void ntt(vector<mint>& a) {
int n = int(a.size());
int h = __builtin_ctzll((unsigned int)n);
a.resize(1 << h);
int len = 0; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len < h) {
if (h - len == 1) {
int p = 1 << (h - len - 1);
mint rot = 1;
for (int s = 0; s < (1 << len); s++) {
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p] * rot;
a[i + offset] = l + r;
a[i + offset + p] = l - r;
}
if (s + 1 != (1 << len)) rot *= rate2[__builtin_ctzll(~(unsigned int)(s))];
}
len++;
} else {
// 4-base
int p = 1 << (h - len - 2);
mint rot = 1, imag = root[2];
for (int s = 0; s < (1 << len); s++) {
mint rot2 = rot * rot;
mint rot3 = rot2 * rot;
int offset = s << (h - len);
for (int i = 0; i < p; i++) {
auto mod2 = 1ULL * mint::get_mod() * mint::get_mod();
auto a0 = 1ULL * a[i + offset].val();
auto a1 = 1ULL * a[i + offset + p].val() * rot.val();
auto a2 = 1ULL * a[i + offset + 2 * p].val() * rot2.val();
auto a3 = 1ULL * a[i + offset + 3 * p].val() * rot3.val();
auto a1na3imag = 1ULL * mint(a1 + mod2 - a3).val() * imag.val();
auto na2 = mod2 - a2;
a[i + offset] = a0 + a2 + a1 + a3;
a[i + offset + 1 * p] = a0 + a2 + (2 * mod2 - (a1 + a3));
a[i + offset + 2 * p] = a0 + na2 + a1na3imag;
a[i + offset + 3 * p] = a0 + na2 + (mod2 - a1na3imag);
}
if (s + 1 != (1 << len)) rot *= rate3[__builtin_ctzll(~(unsigned int)(s))];
}
len += 2;
}
}
}
void intt(vector<mint>& a) {
int n = int(a.size());
int h = __builtin_ctzll((unsigned int)n);
a.resize(1 << h);
int len = h; // a[i, i+(n>>len), i+2*(n>>len), ..] is transformed
while (len) {
if (len == 1) {
int p = 1 << (h - len);
mint irot = 1;
for (int s = 0; s < (1 << (len - 1)); s++) {
int offset = s << (h - len + 1);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p];
a[i + offset] = l + r;
a[i + offset + p] = (unsigned long long)(mint::get_mod() + l.val() - r.val()) * irot.val();
}
if (s + 1 != (1 << (len - 1))) irot *= irate2[__builtin_ctzll(~(unsigned int)(s))];
}
len--;
} else {
// 4-base
int p = 1 << (h - len);
mint irot = 1, iimag = iroot[2];
for (int s = 0; s < (1 << (len - 2)); s++) {
mint irot2 = irot * irot;
mint irot3 = irot2 * irot;
int offset = s << (h - len + 2);
for (int i = 0; i < p; i++) {
auto a0 = 1ULL * a[i + offset + 0 * p].val();
auto a1 = 1ULL * a[i + offset + 1 * p].val();
auto a2 = 1ULL * a[i + offset + 2 * p].val();
auto a3 = 1ULL * a[i + offset + 3 * p].val();
auto a2na3iimag = 1ULL * mint((mint::get_mod() + a2 - a3) * iimag.val()).val();
a[i + offset] = a0 + a1 + a2 + a3;
a[i + offset + 1 * p] = (a0 + (mint::get_mod() - a1) + a2na3iimag) * irot.val();
a[i + offset + 2 * p] = (a0 + a1 + (mint::get_mod() - a2) + (mint::get_mod() - a3)) * irot2.val();
a[i + offset + 3 * p] = (a0 + (mint::get_mod() - a1) + (mint::get_mod() - a2na3iimag)) * irot3.val();
}
if (s + 1 != (1 << (len - 2))) irot *= irate3[__builtin_ctzll(~(unsigned int)(s))];
}
len -= 2;
}
}
mint e = mint(n).inv();
for (auto& x : a) x *= e;
}
vector<mint> multiply(const vector<mint>& a, const vector<mint>& b) {
if (a.empty() || b.empty()) return vector<mint>();
int n = a.size(), m = b.size();
int sz = n + m - 1;
if (n <= 30 || m <= 30) {
if (n > 30) return multiply(b, a);
vector<mint> res(sz);
for (int i = 0; i < n; i++)
for (int j = 0; j < m; j++) res[i + j] += a[i] * b[j];
return res;
}
int sz1 = 1;
while (sz1 < sz) sz1 <<= 1;
vector<mint> res(sz1);
for (int i = 0; i < n; i++) res[i] = a[i];
ntt(res);
if (a == b)
for (int i = 0; i < sz1; i++) res[i] *= res[i];
else {
vector<mint> c(sz1);
for (int i = 0; i < m; i++) c[i] = b[i];
ntt(c);
for (int i = 0; i < sz1; i++) res[i] *= c[i];
}
intt(res);
res.resize(sz);
return res;
}
// c[i]=sum[j]a[j]b[i+j]
vector<mint> middle_product(const vector<mint>& a, const vector<mint>& b) {
if (b.empty() || a.size() > b.size()) return {};
int n = a.size(), m = b.size();
int sz = m - n + 1;
if (n <= 30 || sz <= 30) {
vector<mint> res(sz);
for (int i = 0; i < sz; i++)
for (int j = 0; j < n; j++) res[i] += a[j] * b[i + j];
return res;
}
int sz1 = 1;
while (sz1 < m) sz1 <<= 1;
vector<mint> res(sz1), b2(sz1);
reverse_copy(a.begin(), a.end(), res.begin());
copy(b.begin(), b.end(), b2.begin());
ntt(res);
ntt(b2);
for (int i = 0; i < res.size(); i++) res[i] *= b2[i];
intt(res);
res.resize(m);
res.erase(res.begin(), res.begin() + n - 1);
return res;
}
void ntt_doubling(vector<mint>& a) {
int n = (int)a.size();
auto b = a;
intt(b);
mint r = 1, zeta = mint(g).pow((mint::get_mod() - 1) / (n << 1));
for (int i = 0; i < n; i++) b[i] *= r, r *= zeta;
ntt(b);
copy(b.begin(), b.end(), back_inserter(a));
}
};
/**
* @brief NTT (数論変換)
* @docs docs/fft/ntt.md
*/
#line 2 "fps/formal-power-series.hpp"
template <class mint>
struct FormalPowerSeries : vector<mint> {
using vector<mint>::vector;
using FPS = FormalPowerSeries;
FPS &operator+=(const FPS &r) {
if (r.size() > this->size()) this->resize(r.size());
for (int i = 0; i < (int)r.size(); i++) (*this)[i] += r[i];
return *this;
}
FPS &operator+=(const mint &r) {
if (this->empty()) this->resize(1);
(*this)[0] += r;
return *this;
}
FPS &operator-=(const FPS &r) {
if (r.size() > this->size()) this->resize(r.size());
for (int i = 0; i < (int)r.size(); i++) (*this)[i] -= r[i];
return *this;
}
FPS &operator-=(const mint &r) {
if (this->empty()) this->resize(1);
(*this)[0] -= r;
return *this;
}
FPS &operator*=(const mint &v) {
for (int k = 0; k < (int)this->size(); k++) (*this)[k] *= v;
return *this;
}
FPS &operator/=(const FPS &r) {
if (this->size() < r.size()) {
this->clear();
return *this;
}
int n = this->size() - r.size() + 1;
if ((int)r.size() <= 64) {
FPS f(*this), g(r);
g.shrink();
mint coeff = g.at(g.size() - 1).inv();
for (auto &x : g) x *= coeff;
int deg = (int)f.size() - (int)g.size() + 1;
int gs = g.size();
FPS quo(deg);
for (int i = deg - 1; i >= 0; i--) {
quo[i] = f[i + gs - 1];
for (int j = 0; j < gs; j++) f[i + j] -= quo[i] * g[j];
}
*this = quo * coeff;
this->resize(n, mint(0));
return *this;
}
return *this = ((*this).rev().pre(n) * r.rev().inv(n)).pre(n).rev();
}
FPS &operator%=(const FPS &r) {
*this -= *this / r * r;
shrink();
return *this;
}
FPS operator+(const FPS &r) const { return FPS(*this) += r; }
FPS operator+(const mint &v) const { return FPS(*this) += v; }
FPS operator-(const FPS &r) const { return FPS(*this) -= r; }
FPS operator-(const mint &v) const { return FPS(*this) -= v; }
FPS operator*(const FPS &r) const { return FPS(*this) *= r; }
FPS operator*(const mint &v) const { return FPS(*this) *= v; }
FPS operator/(const FPS &r) const { return FPS(*this) /= r; }
FPS operator%(const FPS &r) const { return FPS(*this) %= r; }
FPS operator-() const {
FPS ret(this->size());
for (int i = 0; i < (int)this->size(); i++) ret[i] = -(*this)[i];
return ret;
}
void shrink() {
while (this->size() && this->back() == mint(0)) this->pop_back();
}
FPS rev() const {
FPS ret(*this);
reverse(begin(ret), end(ret));
return ret;
}
FPS dot(FPS r) const {
FPS ret(min(this->size(), r.size()));
for (int i = 0; i < (int)ret.size(); i++) ret[i] = (*this)[i] * r[i];
return ret;
}
FPS pre(int sz) const {
return FPS(begin(*this), begin(*this) + min((int)this->size(), sz));
}
FPS operator>>=(int sz) {
assert(sz >= 0);
if ((int)this->size() <= sz) return {};
this->erase(this->begin(), this->begin() + sz);
return *this;
}
FPS operator>>(int sz) const {
if ((int)this->size() <= sz) return {};
FPS ret(*this);
ret.erase(ret.begin(), ret.begin() + sz);
return ret;
}
FPS operator<<=(int sz) {
assert(sz >= 0);
this->insert(this->begin(), sz, mint(0));
return *this;
}
FPS operator<<(int sz) const {
FPS ret(*this);
ret.insert(ret.begin(), sz, mint(0));
return ret;
}
FPS diff() const {
const int n = (int)this->size();
FPS ret(max(0, n - 1));
mint one(1), coeff(1);
for (int i = 1; i < n; i++) {
ret[i - 1] = (*this)[i] * coeff;
coeff += one;
}
return ret;
}
FPS integral() const {
const int n = (int)this->size();
FPS ret(n + 1);
ret[0] = mint(0);
if (n > 0) ret[1] = mint(1);
auto mod = mint::get_mod();
for (int i = 2; i <= n; i++) ret[i] = (-ret[mod % i]) * (mod / i);
for (int i = 0; i < n; i++) ret[i + 1] *= (*this)[i];
return ret;
}
mint eval(mint x) const {
mint r = 0, w = 1;
for (auto &v : *this) r += w * v, w *= x;
return r;
}
FPS log(int deg = -1) const {
assert((*this)[0] == mint(1));
if (deg == -1) deg = (int)this->size();
return (this->diff() * this->inv(deg)).pre(deg - 1).integral();
}
FPS pow(int64_t k, int deg = -1) const {
const int n = (int)this->size();
if (deg == -1) deg = n;
if (k == 0) {
FPS ret(deg);
if (deg) ret[0] = 1;
return ret;
}
for (int i = 0; i < n; i++) {
if ((*this)[i] != mint(0)) {
mint rev = mint(1) / (*this)[i];
FPS ret = (((*this * rev) >> i).log(deg) * k).exp(deg);
ret *= (*this)[i].pow(k);
ret = (ret << (i * k)).pre(deg);
if ((int)ret.size() < deg) ret.resize(deg, mint(0));
return ret;
}
if (__int128_t(i + 1) * k >= deg) return FPS(deg, mint(0));
}
return FPS(deg, mint(0));
}
static void *ntt_ptr;
static void set_ntt();
FPS &operator*=(const FPS &r);
FPS middle_product(const FPS &r) const;
void ntt();
void intt();
void ntt_doubling();
static int ntt_root();
FPS inv(int deg = -1) const;
FPS exp(int deg = -1) const;
};
template <typename mint>
void *FormalPowerSeries<mint>::ntt_ptr = nullptr;
#line 5 "fps/fps-ntt-friendly.hpp"
template <class mint>
void FormalPowerSeries<mint>::set_ntt() {
if (!ntt_ptr) ntt_ptr = new NTT<mint>;
}
template <class mint>
FormalPowerSeries<mint>& FormalPowerSeries<mint>::operator*=(const FormalPowerSeries<mint>& r) {
if (this->empty() || r.empty()) {
this->clear();
return *this;
}
set_ntt();
auto ret = static_cast<NTT<mint>*>(ntt_ptr)->multiply(*this, r);
return *this = FormalPowerSeries<mint>(ret.begin(), ret.end());
}
template <class mint>
FormalPowerSeries<mint> FormalPowerSeries<mint>::middle_product(const FormalPowerSeries<mint>& r) const {
set_ntt();
auto ret = static_cast<NTT<mint>*>(ntt_ptr)->middle_product(*this, r);
return FormalPowerSeries<mint>(ret.begin(), ret.end());
}
template <class mint>
void FormalPowerSeries<mint>::ntt() {
set_ntt();
static_cast<NTT<mint>*>(ntt_ptr)->ntt(*this);
}
template <class mint>
void FormalPowerSeries<mint>::intt() {
set_ntt();
static_cast<NTT<mint>*>(ntt_ptr)->intt(*this);
}
template <class mint>
void FormalPowerSeries<mint>::ntt_doubling() {
set_ntt();
static_cast<NTT<mint>*>(ntt_ptr)->ntt_doubling(*this);
}
template <typename mint>
int FormalPowerSeries<mint>::ntt_root() {
set_ntt();
return static_cast<NTT<mint>*>(ntt_ptr)->g;
}
template <typename mint>
FormalPowerSeries<mint> FormalPowerSeries<mint>::inv(int deg) const {
assert((*this)[0] != mint(0));
if (deg == -1) deg = (*this).size();
FPS ret{mint(1) / (*this)[0]};
for (int i = 1; i < deg; i <<= 1)
ret = (ret + ret - ret * ret * (*this).pre(i << 1)).pre(i << 1);
return ret.pre(deg);
}
template <typename mint>
FormalPowerSeries<mint> FormalPowerSeries<mint>::exp(int deg) const {
assert((*this)[0] == mint(0));
if (deg == -1) deg = (*this).size();
FPS ret{mint(1)};
for (int i = 1; i < deg; i <<= 1)
ret = (ret * ((*this).pre(i << 1) - ret.log(i << 1) + 1)).pre(i << 1);
return ret.pre(deg);
}
#line 7 "verify/fps/LC_sqrt_of_formal_power_series.relaxed.test.cpp"
using fps = FormalPowerSeries<mint>;
#line 2 "fps/fps-sqrt.hpp"
#line 2 "modint/mod-sqrt.hpp"
#line 2 "modint/mod-pow.hpp"
unsigned int ModPow(unsigned int a, unsigned long long n, unsigned int m) {
unsigned long long x = a, y = 1;
while (n) {
if (n & 1) y = y * x % m;
x = x * x % m;
n >>= 1;
}
return y;
}
#line 4 "modint/mod-sqrt.hpp"
long long ModSqrt(long long a, long long p) {
if (a >= p) a %= p;
if (p == 2) return a & 1;
if (a == 0) return 0;
if (ModPow(a, (p - 1) / 2, p) != 1) return -1;
if (p % 4 == 3) return ModPow(a, (3 * p - 1) / 4, p);
unsigned int z = 2, q = p - 1;
while (ModPow(z, (p - 1) / 2, p) == 1) z++;
int s = 0;
while (!(q & 1)) {
s++;
q >>= 1;
}
int m = s;
unsigned int c = ModPow(z, q, p);
unsigned int t = ModPow(a, q, p);
unsigned int r = ModPow(a, (q + 1) / 2, p);
while (true) {
if (t == 1) return r;
unsigned int pow = t;
int j = 1;
for (; j < m; j++) {
pow = 1ll * pow * pow % p;
if (pow == 1) break;
}
unsigned int b = c;
for (int i = 0; i < m - j - 1; i++) b = 1ll * b * b % p;
m = j;
c = 1ll * b * b % p;
t = 1ll * t * c % p;
r = 1ll * r * b % p;
}
}
#line 5 "fps/fps-sqrt.hpp"
template <typename mint>
FormalPowerSeries<mint> FpsSqrt(const FormalPowerSeries<mint> &f, int deg = -1) {
if (deg == -1) deg = (int)f.size();
if ((int)f.size() == 0) return FormalPowerSeries<mint>(deg, 0);
if (f[0] == mint(0)) {
for (int i = 1; i < (int)f.size(); i++) {
if (f[i] != mint(0)) {
if (i & 1) return {};
if (deg - i / 2 <= 0) break;
auto ret = FpsSqrt(f >> i, deg - i / 2);
if (ret.empty()) return {};
ret = ret << (i / 2);
if ((int)ret.size() < deg) ret.resize(deg, mint(0));
return ret;
}
}
return FormalPowerSeries<mint>(deg, 0);
}
int64_t sqr = ModSqrt(f[0].val(), mint::get_mod());
if (sqr == -1) return {};
assert(sqr * sqr % mint::get_mod() == f[0].val());
FormalPowerSeries<mint> ret = {mint(sqr)};
mint inv2 = mint(2).inv();
for (int i = 1; i < deg; i <<= 1) {
ret = (ret + f.pre(i << 1) * ret.inv(i << 1)) * inv2;
}
return ret.pre(deg);
}
#line 2 "modint/factorial.hpp"
template <class mint>
struct Factorial {
static void reserve(int n) {
inv(n);
fact(n);
fact_inv(n);
}
static mint inv(int n) {
static long long mod = mint::get_mod();
static vector<mint> buf({0, 1});
assert(n != 0);
if (mod != mint::get_mod()) {
mod = mint::get_mod();
buf = vector<mint>({0, 1});
}
while ((int)buf.capacity() <= n) buf.reserve(buf.capacity() * 2);
while ((int)buf.size() <= n) {
long long k = buf.size(), q = (mod + k - 1) / k;
buf.push_back(q * buf[k * q - mod]);
}
return buf[n];
}
static mint fact(int n) {
static long long mod = mint::get_mod();
static vector<mint> buf({1, 1});
assert(n >= 0);
if (mod != mint::get_mod()) {
mod = mint::get_mod();
buf = vector<mint>({1, 1});
}
while ((int)buf.capacity() <= n) buf.reserve(buf.capacity() * 2);
while ((int)buf.size() <= n) {
long long k = buf.size();
buf.push_back(buf.back() * k);
}
return buf[n];
}
static mint fact_inv(int n) {
static long long mod = mint::get_mod();
static vector<mint> buf({1, 1});
assert(n >= 0);
if (mod != mint::get_mod()) {
mod = mint::get_mod();
buf = vector<mint>({1, 1});
}
if ((int)buf.size() <= n) inv(n);
while ((int)buf.capacity() <= n) buf.reserve(buf.capacity() * 2);
while ((int)buf.size() <= n) {
long long k = buf.size();
buf.push_back(buf.back() * inv(k));
}
return buf[n];
}
static mint binom(int n, int r) {
if (r < 0 || r > n) return 0;
return fact(n) * fact_inv(r) * fact_inv(n - r);
}
static mint binom_naive(int n, int r) {
if (r < 0 || r > n) return 0;
mint res = fact_inv(r);
for (int i = 0; i < r; i++) res *= n - i;
return res;
}
static mint multinom(const vector<int>& r) {
int n = 0;
for (auto& x : r) {
if (x < 0) return 0;
n += x;
}
mint res = fact(n);
for (auto& x : r) res *= fact_inv(x);
return res;
}
static mint P(int n, int r) {
if (r < 0 || r > n) return 0;
return fact(n) * fact_inv(n - r);
}
// partition n items to r groups (allow empty group)
static mint H(int n, int r) {
if (n < 0 || r < 0) return 0;
return r == 0 ? 1 : binom(n + r - 1, r);
}
};
/**
* @brief 階乗, 二項係数
*/
#line 5 "fps/relaxed.hpp"
template <class mint>
class RelaxedMultiply {
const int B = 6;
using fps = FormalPowerSeries<mint>;
int n;
fps f, g, h;
vector<fps> f0, g0;
public:
RelaxedMultiply() : n(0), f(1), g(1), f0(B), g0(B) {}
mint append(mint a, mint b) {
f[n] = a, g[n] = b;
n++;
int m = n & -n;
int l = __builtin_ctz((unsigned int)m);
if (n == m) {
f.resize(2 * m);
g.resize(2 * m);
h.resize(2 * m);
if (l < B) {
for (int i = 0; i < m; i++)
for (int j = m - 1 - i; j < m; j++)
h[i + j] += f[i] * g[j];
} else {
auto f1 = f;
f1.ntt();
f0.push_back(fps(f1.begin(), f1.begin() + m));
auto g1 = g;
g1.ntt();
g0.push_back(fps(g1.begin(), g1.begin() + m));
for (int i = 0; i < 2 * m; i++) f1[i] *= g1[i];
f1.intt();
for (int i = m - 1; i < 2 * m - 1; i++) h[i] += f1[i];
}
} else {
if (l < B) {
int s = n - m;
for (int i = 0; i < m; i++) {
int t = m - 1 - i;
for (int j = 0; j < m; j++)
h[n - 1 + j] += f[s + i] * g[t + j] + g[s + i] * f[t + j];
}
} else {
fps f1(2 * m), g1(2 * m), h1(2 * m);
copy(f.begin() + (n - m), f.begin() + n, f1.begin());
copy(g.begin() + (n - m), g.begin() + n, g1.begin());
f1.ntt(), g1.ntt();
for (int i = 0; i < 2 * m; i++) h1[i] += f1[i] * g0[l + 1][i] + f0[l + 1][i] * g1[i];
h1.intt();
for (int i = m - 1; i < 2 * m - 1; i++) h[n - m + i] += h1[i];
}
}
return h[n - 1];
}
};
template <class mint>
class SemiRelaxedMultiply {
const int B = 6;
using fps = FormalPowerSeries<mint>;
int n, m0;
fps f, g, h;
vector<fps> g0;
public:
SemiRelaxedMultiply(const fps& g_) : n(0), m0(1 << B), f(1), g(g_) {
while (m0 < g.size()) m0 <<= 1;
g.resize(m0);
for (int k = 1; k <= m0; k <<= 1) {
fps g1(2 * k);
copy(g.begin(), g.begin() + min(2 * k, m0), g1.begin());
g1.ntt();
g0.push_back(g1);
}
}
mint append(mint a) {
f[n] = a;
n++;
int m = n & -n;
int l = __builtin_ctz((unsigned int)m);
if (n == m) {
f.resize(2 * m);
h.resize(2 * m);
}
while (l >= g0.size()) {
g0.push_back(g0.back());
g0.back().ntt_doubling();
}
if (l < B) {
int s = n - m;
for (int i = 0; i < m; i++) {
int t = m - 1 - i;
for (int j = 0; j < m; j++)
h[n - 1 + j] += f[s + i] * g[t + j];
}
} else {
fps f1(2 * m);
copy(f.begin() + (n - m), f.begin() + n, f1.begin());
f1.ntt();
for (int i = 0; i < 2 * m; i++) f1[i] *= g0[l][i];
f1.intt();
for (int i = m - 1; i < 2 * m - 1; i++) h[n - m + i] += f1[i];
}
return h[n - 1];
}
};
// f(x)/g(x)
template <class mint>
class RelaxedDivide {
RelaxedMultiply<mint> mul;
int n;
mint c, v;
public:
RelaxedDivide() : n(0) {}
mint append(mint a, mint b) { return v = n++ == 0 ? a * (c = b.inv()) : (a - mul.append(v, b)) * c; }
};
template <class mint>
class RelaxedInv {
RelaxedMultiply<mint> mul;
int n;
mint c, v;
public:
RelaxedInv() : n(0) {}
mint append(mint a) { return v = n++ == 0 ? (c = a.inv()) : -mul.append(v, a) * c; }
};
template <class mint>
class RelaxedExp {
using fact = Factorial<mint>;
RelaxedMultiply<mint> mul;
int n;
mint v;
public:
RelaxedExp() : n(0) {}
mint append(mint a) {
if (n++ == 0) {
assert(a == 0);
v = 1;
} else {
v = mul.append((n - 1) * a, v) * fact::inv(n - 1);
}
return v;
}
};
template <class mint>
class RelaxedLog {
using fact = Factorial<mint>;
RelaxedMultiply<mint> mul;
int n;
mint a0, v;
public:
RelaxedLog() : n(0) {}
mint append(mint a) {
if (n == 0) {
assert(a == 1);
n++;
return 0;
} else if (n == 1) {
a0 = a, n++;
return v = a;
} else {
v = n * a - mul.append(v, a0);
a0 = a;
return v * fact::inv(n++);
}
}
};
template <class mint>
class RelaxedSqrt {
RelaxedMultiply<mint> mul;
int n;
mint c, v;
public:
RelaxedSqrt() : n(0) {}
mint append(mint a) {
if (n == 0) {
long long sq = ModSqrt(a.val(), mint::get_mod());
assert(sq != -1 && sq != 0);
c = mint(2 * sq).inv();
n++;
return sq;
} else {
return v = (n++ == 1 ? a : a - mul.append(v, v)) * c;
}
}
};
/**
* @brief Relaxed
* @docs docs/fps/relaxed.md
*/
#line 10 "verify/fps/LC_sqrt_of_formal_power_series.relaxed.test.cpp"
int main() {
int n;
in(n);
fps a(n);
in(a);
if (a[0] == 0) {
a = FpsSqrt(a);
if (a.empty())
out(-1);
else
out(a);
} else if (ModSqrt(a[0].val(), mint::get_mod()) == -1) {
out(-1);
} else {
fps b(n);
RelaxedSqrt<mint> sqrt;
rep(i, 0, n) b[i] = sqrt.append(a[i]);
out(b);
}
}