kth-competitive-programming · Chillee · Apr 23, 2019 · Apr 23, 2019 · Apr 25, 2019 · Apr 25, 2019
diff --git a/content/number-theory/ModularArithmetic.h b/content/number-theory/ModularArithmetic.h
@@ -13,6 +13,7 @@
 const ll mod = 17; // change to something else
 struct Mod {
 	ll x;
+	Mod():x(0) {}
 	Mod(ll xx) : x(xx) {}
 	Mod operator+(Mod b) { return Mod((x + b.x) % mod); }
 	Mod operator-(Mod b) { return Mod((x - b.x + mod) % mod); }
@@ -27,4 +28,5 @@ struct Mod {
 		Mod r = *this ^ (e / 2); r = r * r;
 		return e&1 ? *this * r : r;
 	}
+	explicit operator ll(){ return x; }
 };
diff --git a/content/numerical/FastFourierTransform.h b/content/numerical/FastFourierTransform.h
@@ -3,22 +3,31 @@
  * Date: 2019-01-09
  * License: CC0
  * Source: http://neerc.ifmo.ru/trains/toulouse/2017/fft2.pdf (do read, it's excellent)
-   Papers about accuracy: http://www.daemonology.net/papers/fft.pdf, http://www.cs.berkeley.edu/~fateman/papers/fftvsothers.pdf
-   For integers rounding works if $(|a| + |b|)\max(a, b) < \mathtt{\sim} 10^9$, or in theory maybe $10^6$.
+   Accuracy bound from http://www.daemonology.net/papers/fft.pdf
  * Description: fft(a, ...) computes $\hat f(k) = \sum_x a[x] \exp(2\pi i \cdot k x / N)$ for all $k$. Useful for convolution:
    \texttt{conv(a, b) = c}, where $c[x] = \sum a[i]b[x-i]$.
    For convolution of complex numbers or more than two vectors: FFT, multiply
    pointwise, divide by n, reverse(start+1, end), FFT back.
-   For integers, consider using a number-theoretic transform instead, to avoid rounding issues.
+   Let N be $\max(|a|,|b|)$. Is guaranteed safe as long as $N\log_2{N}\max(a)\max(b) < \mathtt{\sim} 10^{16}$ .
+   Consider using number-theoretic transform or FFTMod instead if precision is an issue.
  * Time: O(N \log N), where $N = |A|+|B|-1$ ($\tilde 1s$ for $N=2^{22}$)
  * Status: somewhat tested
  */
 #pragma once
 
 typedef complex<double> C;
+typedef complex<long double> Cd;
 typedef vector<double> vd;
-
-void fft(vector<C> &a, vector<C> &rt, vi& rev, int n) {
+void fft(vector<C> &a, int n, int L, vector<C> &rt) {
+	vi rev(n);
+	rep(i,0,n) rev[i] = (rev[i / 2] | (i & 1) << L) / 2;
+	if (rt.empty()) {
+		rt.assign(n, 1);
+		for (int k = 2; k < n; k *= 2) {
+			Cd z[] = {1, polar(1.0, M_PI / k)};
+			rep(i, k, 2 * k) rt[i] = Cd(rt[i / 2]) * z[i & 1];
+		}
+	}
 	rep(i,0,n) if (i < rev[i]) swap(a[i], a[rev[i]]);
 	for (int k = 1; k < n; k *= 2)
 		for (int i = 0; i < n; i += 2 * k) rep(j,0,k) {
@@ -27,25 +36,19 @@ void fft(vector<C> &a, vector<C> &rt, vi& rev, int n) {
 			C z(x[0]*y[0] - x[1]*y[1], x[0]*y[1] + x[1]*y[0]);           /// exclude-line
 			a[i + j + k] = a[i + j] - z;
 			a[i + j] += z;
-	}
+		}
 }
-
-vd conv(const vd& a, const vd& b) {
+vd conv(const vd &a, const vd &b) {
 	if (a.empty() || b.empty()) return {};
 	vd res(sz(a) + sz(b) - 1);
 	int L = 32 - __builtin_clz(sz(res)), n = 1 << L;
-	vector<C> in(n), out(n), rt(n, 1); vi rev(n);
-	rep(i,0,n) rev[i] = (rev[i/2] | (i&1) << L) / 2;
-	for (int k = 2; k < n; k *= 2) {
-		C z[] = {1, polar(1.0, M_PI / k)};
-		rep(i,k,2*k) rt[i] = rt[i/2] * z[i&1];
-	}
+	vector<C> in(n), out(n), rt;
 	copy(all(a), begin(in));
 	rep(i,0,sz(b)) in[i].imag(b[i]);
-	fft(in, rt, rev, n);
+	fft(in, n, L, rt);
 	trav(x, in) x *= x;
 	rep(i,0,n) out[i] = in[-i & (n - 1)] - conj(in[i]);
-	fft(out, rt, rev, n);
-	rep(i,0,sz(res)) res[i] = imag(out[i]) / (4*n);
+	fft(out, n, L, rt);
+	rep(i,0,sz(res)) res[i] = imag(out[i]) / (4 * n);
 	return res;
 }
diff --git a/content/numerical/FastFourierTransformMod.h b/content/numerical/FastFourierTransformMod.h
@@ -0,0 +1,36 @@
+/**
+ * Author: chilli
+ * Date: 2019-04-25
+ * License: CC0
+ * Source: http://neerc.ifmo.ru/trains/toulouse/2017/fft2.pdf
+ * Description: Higher precision FFT, can be used for convolutions modulo arbitrary integers.
+ * Let N be $\max(|a|,|b|)$. Is guaranteed safe as long as $N\log_2{N}\sqrt{\max(a)\max(b)} < \mathtt{\sim} 10^{16}$ .
+ * Time: O(N \log N), where $N = |A|+|B|-1$ (twice as slow as NTT or FFT)
+ * Status: somewhat tested
+ */
+#pragma once
+
+#include "FastFourierTransform.h"
+
+typedef vector<ll> vl;
+template <int M> vl convMod(const vl &a, const vl &b) {
+	if (a.empty() || b.empty()) return {};
+	vl res(sz(a) + sz(b) - 1);
+	int B=32-__builtin_clz(sz(res)), n = 1<<B, cut=int(sqrt(M));
+	vector<C> L(n), R(n), outs(n), outl(n), rt;
+	rep(i,0,sz(a)) L[i] = Cd(a[i] / cut, a[i] % cut);
+	rep(i,0,sz(b)) R[i] = Cd(b[i] / cut, b[i] % cut);
+	fft(L, n, B, rt), fft(R, n, B, rt);
+	rep(i,0,n) {
+		int j = -i & (n - 1);
+		outl[j] = (L[i] + conj(L[j])) * R[i] / (2.0 * n);
+		outs[j] = (L[i] - conj(L[j])) * R[i] / (2.0 * n) / 1i;
+	}
+	fft(outl, n, B, rt), fft(outs, n, B, rt);
+	rep(i,0,sz(res)) {
+		ll av = ll(outl[i].real()+.5), cv = ll(outs[i].imag()+.5);
+		ll bv = ll(outl[i].imag()+.5) + ll(outs[i].real()+.5);
+		res[i] = ((av % M * cut + bv % M) * cut + cv % M) % M;
+	}
+	return res;
+}
diff --git a/content/numerical/PolynomialBase.h b/content/numerical/PolynomialBase.h
@@ -0,0 +1,57 @@
+/**
+ * Author: chilli, Andrew He, Adamant
+ * Date: 2019-04-27
+ * Description: A FFT based Polynomial class.
+ */
+#pragma once
+
+#include "../number-theory/ModularArithmetic.h"
+#include "FastFourierTransform.h"
+#include "FastFourierTransformMod.h"
+// #include "NumberTheoreticTransform.h"
+
+typedef Mod num;
+typedef vector<num> poly;
+vector<Mod> conv(vector<Mod> a, vector<Mod> b) {
+	auto res = convMod<mod>(vl(all(a)), vl(all(b)));
+	// auto res = conv(vl(all(a)), vl(all(b)));
+	return vector<Mod>(all(res));
+}
+poly &operator+=(poly &a, const poly &b) {
+	a.resize(max(sz(a), sz(b)));
+	rep(i, 0, sz(b)) a[i] = a[i] + b[i];
+	return a;
+}
+poly &operator-=(poly &a, const poly &b) {
+	a.resize(max(sz(a), sz(b)));
+	rep(i, 0, sz(b)) a[i] = a[i] - b[i];
+	return a;
+}
+
+poly &operator*=(poly &a, const poly &b) {
+	if (sz(a) + sz(b) < 100){
+		poly res(sz(a) + sz(b) - 1);
+		rep(i,0,sz(a)) rep(j,0,sz(b))
+			res[i + j] = (res[i + j] + a[i] * b[j]);
+		return (a = res);
+	}
+	return a = conv(a, b);
+}
+poly operator*(poly a, const num b) {
+	poly c = a;
+	trav(i, c) i = i * b;
+	return c;
+}
+#define OP(o, oe) \
+	poly operator o(poly a, poly b) { \
+		poly c = a; \
+		return c oe b; \
+	}
+OP(*, *=) OP(+, +=) OP(-, -=);
+poly modK(poly a, int k) { return {a.begin(), a.begin() + min(k, sz(a))}; }
+poly inverse(poly A) {
+	poly B = poly({num(1) / A[0]});
+	while (sz(B) < sz(A))
+		B = modK(B * (poly({num(2)}) - modK(A, 2*sz(B)) * B), 2 * sz(B));
+	return modK(B, sz(A));
+}
diff --git a/content/numerical/PolynomialEval.h b/content/numerical/PolynomialEval.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "PolynomialMod.h"
+
+vector<num> eval(const poly &a, const vector<num> &x) {
+	int n = sz(x);
+	if (!n) return {};
+	vector<poly> up(2 * n);
+	rep(i, 0, n) up[i + n] = poly({num(0) - x[i], 1});
+	for (int i = n - 1; i > 0; i--)
+		up[i] = up[2 * i] * up[2 * i + 1];
+	vector<poly> down(2 * n);
+	down[1] = a % up[1];
+	rep(i, 2, 2 * n) down[i] = down[i / 2] % up[i];
+	vector<num> y(n);
+	rep(i, 0, n) y[i] = down[i + n][0];
+	return y;
+}
diff --git a/content/numerical/PolynomialInterpolate.h b/content/numerical/PolynomialInterpolate.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "PolynomialMod.h"
+#include "PolynomialPow.h"
+#include "PolynomialEval.h"
+
+poly interp(vector<num> x, vector<num> y) {
+	int n=sz(x);
+	vector<poly> up(n*2);
+	rep(i,0,n) up[i+n] = poly({num(0)-x[i], num(1)});
+	for(int i=n-1; i>0;i--) up[i] = up[2*i]*up[2*i+1];
+	vector<num> a = eval(deriv(up[1]), x);
+	vector<poly> down(2*n);
+	rep(i,0,n) down[i+n] = poly({y[i]*(num(1)/a[i])});
+	for(int i=n-1;i>0;i--) down[i] = down[i*2] * up[i*2+1] + down[i*2+1] * up[i*2];
+	return down[1];
+}
diff --git a/content/numerical/PolynomialMod.h b/content/numerical/PolynomialMod.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "PolynomialBase.h"
+
+poly &operator/=(poly &a, poly b) {
+	if (sz(a) < sz(b))
+		return a = {};
+	int s = sz(a) - sz(b) + 1;
+	reverse(all(a)), reverse(all(b));
+	a.resize(s), b.resize(s);
+	a = a * inverse(b);
+	a.resize(s), reverse(all(a));
+	return a;
+}
+OP(/, /=)
+poly &operator%=(poly &a, poly &b) {
+	if (sz(a) < sz(b))
+		return a;
+	poly c = (a / b) * b;
+	a.resize(sz(b) - 1);
+	rep(i, 0, sz(a)) a[i] = a[i] - c[i];
+	return a;
+}
+OP(%, %=)
diff --git a/content/numerical/PolynomialPow.h b/content/numerical/PolynomialPow.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include "PolynomialBase.h"
+poly deriv(poly a) {
+	if (a.empty())
+		return {};
+	poly b(sz(a) - 1);
+	rep(i, 1, sz(a)) b[i - 1] = a[i] * num(i);
+	return b;
+}
+poly integr(poly a) {
+	if (a.empty()) return {0};
+	poly b(sz(a) + 1);
+	b[1] = num(1);
+	rep(i, 2, sz(b)) b[i] = b[mod%i]*Mod(-mod/i+mod);
+	rep(i, 1 ,sz(b)) b[i] = a[i-1] * b[i];
+	return b;
+}
+poly log(poly a) {
+	return modK(integr(deriv(a) * inverse(a)), sz(a));
+}
+poly exp(poly a) {
+	poly b(1, num(1));
+	if (a.empty())
+		return b;
+	while (sz(b) < sz(a)) {
+		b.resize(sz(b) * 2);
+		b *= (poly({num(1)}) + modK(a, sz(b)) - log(b));
+		b.resize(sz(b) / 2 + 1);
+	}
+	return modK(b, sz(a));
+}
+poly pow(poly a, ll m) {
+	int p = 0, n = sz(a);
+	while (p < sz(a) && a[p].x == 0)
+		++p;
+	if (ll(m)*p >= sz(a)) return poly(sz(a));
+	num j = a[p];
+	a = {a.begin() + p, a.end()};
+	a = a * (num(1) / j);
+	a.resize(n);
+	auto res =  exp(log(a) * num(m)) * (j ^ m);
+	res.insert(res.begin(), p*m, 0);
+	return modK(res, n);
+}
diff --git a/content/numerical/chapter.tex b/content/numerical/chapter.tex
@@ -1,9 +1,8 @@
 \chapter{Numerical}
 
 \kactlimport{GoldenSectionSearch.h}
-\kactlimport{Polynomial.h}
+\kactlimport{PolynomialBase.h}
 \kactlimport{PolyRoots.h}
-\kactlimport{PolyInterpolate.h}
 \kactlimport{BerlekampMassey.h}
 \kactlimport{LinearRecurrence.h}
 \kactlimport{HillClimbing.h}
@@ -19,5 +18,6 @@ \chapter{Numerical}
 \kactlimport{Tridiagonal.h}
 \section{Fourier transforms}
 	\kactlimport{FastFourierTransform.h}
+	\kactlimport{FastFourierTransformMod.h}
 	\kactlimport{NumberTheoreticTransform.h}
 	\kactlimport{FastSubsetTransform.h}