From c1b08ac0400487aba1a571e2ce0285e111868631 Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 15 Jul 2022 19:18:07 +0200 Subject: [PATCH] smarter hnf in fmpzi_gcd_shortest --- fmpzi/gcd_shortest.c | 214 ++++++++++++++++++++++++++++-------------- fmpzi/profile/p-gcd.c | 81 ++++++++++++++++ 2 files changed, 225 insertions(+), 70 deletions(-) create mode 100644 fmpzi/profile/p-gcd.c diff --git a/fmpzi/gcd_shortest.c b/fmpzi/gcd_shortest.c index 91a5d1dbc..e544bcfd6 100644 --- a/fmpzi/gcd_shortest.c +++ b/fmpzi/gcd_shortest.c @@ -183,10 +183,13 @@ void _fmpz_mat22_shortest_l_infinity( void _fmpzi_gcd_fmpz_shortest( fmpz_t gx, fmpz_t gy, - const fmpz_t ax, const fmpz_t ay, + const fmpz_t ax_, const fmpz_t ay_, const fmpz_t b) { fmpz_t A, B, C, ga, ua, va, g, u, v, axog, ayog, m1, m2, m3, m4; + fmpz_t t, ax_copy, ay_copy; + const fmpz* ax = ax_; + const fmpz* ay = ay_; if (fmpz_is_zero(b)) { @@ -194,17 +197,35 @@ void _fmpzi_gcd_fmpz_shortest( fmpz_set(gy, ay); return; } - else if (fmpz_is_zero(ax)) + + fmpz_init(t); + fmpz_init(ax_copy); + fmpz_init(ay_copy); + + /* reduce ax+i*ay by b */ + if (fmpz_cmpabs(ax, b) > 0) + { + fmpz_tdiv_qr(t, ax_copy, ax, b); + ax = ax_copy; + } + + if (fmpz_cmpabs(ay, b) > 0) + { + fmpz_tdiv_qr(t, ay_copy, ay, b); + ay = ay_copy; + } + + if (fmpz_is_zero(ax)) { fmpz_gcd(gx, ay, b); fmpz_zero(gy); - return; + goto cleanup_stage1; } else if (fmpz_is_zero(ay)) { fmpz_gcd(gx, ax, b); fmpz_zero(gy); - return; + goto cleanup_stage1; } fmpz_init(A); @@ -223,12 +244,6 @@ void _fmpzi_gcd_fmpz_shortest( fmpz_init(m3); fmpz_init(m4); -/* - ax ay g*1 g*B - -ay ax row ops 0 g*A - b 0 ------> 0 0 - 0 b 0 0 -*/ fmpz_xgcd(ga, ua, va, ax, ay); fmpz_xgcd(g, u, v, ga, b); /* v not needed */ fmpz_divexact(axog, ax, g); @@ -261,15 +276,54 @@ void _fmpzi_gcd_fmpz_shortest( fmpz_clear(m2); fmpz_clear(m3); fmpz_clear(m4); + +cleanup_stage1: + fmpz_clear(t); + fmpz_clear(ax_copy); + fmpz_clear(ay_copy); } +#define PTR_SWAP(T, A, B) \ + do { \ + T* __t_m_p = A; \ + A = B; \ + B = __t_m_p; \ + } while (0) + + +/* + ax ay g*1 g*B g = gcd(gx, gy) + -ay ax row ops 0 g*A g^2*A = norm(gx + i*gy) + bx by ------> 0 0 + -by bx 0 0 + Except when the gcd is purely real or purely imaginary, it is any shortest + vector in the above lattice in the l_infinity norm. In the exceptional case, + it turns out that A = 1 and B = 0, and we rely on _shortest_l_infinity + returning (1,0) or (0,1) instead of (1,1) in this case. +*/ void _fmpzi_gcd_shortest( fmpz_t gx, fmpz_t gy, const fmpz_t ax, const fmpz_t ay, - const fmpz_t bx, const fmpz_t by) + const fmpz_t bx_, const fmpz_t by_) { - fmpz_t A, B, C, axog, ayog, bxog, byog, ga, ua, va, gb, ub, vb, g, u, v, t, m1, m2, m3, m4; + fmpz_t A, B, C, ag, t1, t2, bg, bu, bv, g, u, v, m1, m2, m3, m4; + fmpz_t bx_copy, by_copy; + const fmpz* bx = bx_; + const fmpz* by = by_; + + /* ensure norm(a) <= norm(b) approximately */ + { + slong ax_size = fmpz_size(ax); + slong ay_size = fmpz_size(ay); + slong bx_size = fmpz_size(bx); + slong by_size = fmpz_size(by); + if (FLINT_MAX(ax_size, ay_size) > FLINT_MAX(bx_size, by_size)) + { + PTR_SWAP(const fmpz, ax, bx); + PTR_SWAP(const fmpz, ay, by); + } + } if (fmpz_is_zero(ax)) { @@ -281,95 +335,115 @@ void _fmpzi_gcd_shortest( _fmpzi_gcd_fmpz_shortest(gx, gy, bx, by, ax); return; } - else if (fmpz_is_zero(bx)) - { - _fmpzi_gcd_fmpz_shortest(gx, gy, ax, ay, by); - return; - } - else if (fmpz_is_zero(by)) - { - _fmpzi_gcd_fmpz_shortest(gx, gy, ax, ay, bx); - return; - } fmpz_init(A); fmpz_init(B); fmpz_init(C); - fmpz_init(axog); - fmpz_init(ayog); - fmpz_init(bxog); - fmpz_init(byog); - fmpz_init(ga); - fmpz_init(ua); - fmpz_init(va); - fmpz_init(gb); - fmpz_init(ub); - fmpz_init(vb); + fmpz_init(ag); + fmpz_init(t1); + fmpz_init(t2); + fmpz_init(bg); + fmpz_init(bu); + fmpz_init(bv); fmpz_init(g); fmpz_init(u); fmpz_init(v); - fmpz_init(t); fmpz_init(m1); fmpz_init(m2); fmpz_init(m3); fmpz_init(m4); + fmpz_init(bx_copy); + fmpz_init(by_copy); -/* - ax ay g*1 g*B - -ay ax row ops 0 g*A g^2*A = norm(gx + i*gy) - bx by ------> 0 0 - -by bx 0 0 + /* + first find t1*ax + t2*ay = ag + [ ax, ay] [ag, t1*ay - t2*ax ] + [-ay, ax] ---> [ 0, (ax^2+ay^2)/ag] + */ + fmpz_xgcd(ag, t1, t2, ax, ay); + fmpz_fmms(m1, t1, ay, t2, ax); + fmpz_fmma(m2, ax, ax, ay, ay); + fmpz_divexact(m2, m2, ag); - Except when the gcd is purely real or purely imaginary, it is any shortest - vector in the above lattice in the l_infinity norm. In the exceptional case, - it turns out that A = 1 and B = 0, and we rely on _shortest_l_infinity - returning (1,0) or (0,1) instead of (1,1) in this case. -*/ - fmpz_xgcd(ga, ua, va, ax, ay); - fmpz_xgcd(gb, ub, vb, bx, by); - fmpz_xgcd(g, u, v, ga, gb); - fmpz_divexact(axog, ax, g); - fmpz_divexact(ayog, ay, g); - fmpz_divexact(bxog, bx, g); - fmpz_divexact(byog, by, g); - fmpz_fmms(m1, ayog, ua, axog, va); - fmpz_fmma(m2, ax, axog, ay, ayog); - fmpz_divexact(m2, m2, ga); - fmpz_fmms(m3, byog, ub, bxog, vb); - fmpz_fmma(m4, bx, bxog, by, byog); - fmpz_divexact(m4, m4, gb); - fmpz_fmms(t, m3, ga, m1, gb); + /* next reduce bx+i*by by (ax^2+ay^2)/ag */ + if (fmpz_cmpabs(bx, m2) > 0) + { + fmpz_tdiv_qr(t1, bx_copy, bx, m2); + bx = bx_copy; + } + + if (fmpz_cmpabs(by, m2) > 0) + { + fmpz_tdiv_qr(t1, by_copy, by, m2); + by = by_copy; + } + + /* now reduce [bx by; -by bx] */ + if (fmpz_is_zero(bx) && fmpz_is_zero(by)) + { + /* avoid division by bg below */ + fmpz_set(gx, ax); + fmpz_set(gy, ay); + goto cleanup; + } + + fmpz_xgcd(bg, bu, bv, bx, by); + fmpz_xgcd(g, u, v, ag, bg); + + if (!fmpz_is_one(g)) + { + fmpz_divexact(m1, m1, g); + fmpz_divexact(m2, m2, g); + fmpz_divexact(t1, bx, g); + fmpz_divexact(t2, by, g); + fmpz_fmms(m3, bu, t2, bv, t1); + fmpz_fmma(m4, bx, t1, by, t2); + } + else + { + fmpz_fmms(m3, bu, by, bv, bx); + fmpz_fmma(m4, bx, bx, by, by); + } + + fmpz_divexact(m4, m4, bg); + + /* now reduce the one remaining 2x2 */ + fmpz_fmms(t1, m3, ag, m1, bg); fmpz_fmma(m1, m1, u, m3, v); - fmpz_divexact(m3, t, g); + if (fmpz_is_one(g)) + fmpz_swap(m3, t1); + else + fmpz_divexact(m3, t1, g); + + /* reduce last column */ fmpz_gcd3(A, m2, m3, m4); - fmpz_fdiv_qr(t, B, m1, A); + fmpz_fdiv_qr(t1, B, m1, A); fmpz_one(C); _fmpz_mat22_shortest_l_infinity(gx, gy, u, v, C, B, A); fmpz_mul(gx, gx, g); fmpz_mul(gy, gy, g); +cleanup: + fmpz_clear(A); fmpz_clear(B); fmpz_clear(C); - fmpz_clear(axog); - fmpz_clear(ayog); - fmpz_clear(bxog); - fmpz_clear(byog); - fmpz_clear(ga); - fmpz_clear(ua); - fmpz_clear(va); - fmpz_clear(gb); - fmpz_clear(ub); - fmpz_clear(vb); + fmpz_clear(ag); + fmpz_clear(t1); + fmpz_clear(t2); + fmpz_clear(bg); + fmpz_clear(bu); + fmpz_clear(bv); fmpz_clear(g); fmpz_clear(u); fmpz_clear(v); - fmpz_clear(t); fmpz_clear(m1); fmpz_clear(m2); fmpz_clear(m3); fmpz_clear(m4); + fmpz_clear(bx_copy); + fmpz_clear(by_copy); } diff --git a/fmpzi/profile/p-gcd.c b/fmpzi/profile/p-gcd.c new file mode 100644 index 000000000..a66df629e --- /dev/null +++ b/fmpzi/profile/p-gcd.c @@ -0,0 +1,81 @@ +/* + Copyright 2022 Daniel Schultz + + This file is part of Arb. + + FLINT is free software: you can redistribute it and/or modify it under + the terms of the GNU Lesser General Public License (LGPL) as published + by the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. See . +*/ + +#include "fmpzi.h" +#include "flint/profiler.h" + +/* approx nbits(norm(x)) = n */ +static void fmpzi_randbits_norm(fmpzi_t x, flint_rand_t state, flint_bitcnt_t n) +{ + fmpz_randbits(fmpzi_realref(x), state, (n+1)/2); + fmpz_randtest(fmpzi_imagref(x), state, (n+0)/2); + if (n_randint(state, 2)) + fmpz_swap(fmpzi_realref(x), fmpzi_imagref(x)); +} + +double profile_it(ulong a_bits, ulong b_bits, ulong g_bits, flint_rand_t state) +{ + fmpzi_t g, a, b; + slong i, nreps; + timeit_t timer; + + fmpzi_init(g); + fmpzi_init(a); + fmpzi_init(b); + + nreps = 1 + 5000000/(1 + a_bits + b_bits); + + timeit_start(timer); + for (i = 0; i < nreps; i++) + { + fmpzi_randbits_norm(a, state, FLINT_MAX(1, a_bits - g_bits)); + fmpzi_randbits_norm(b, state, FLINT_MAX(1, b_bits - g_bits)); + fmpzi_randbits_norm(g, state, g_bits); + fmpzi_mul(a, a, g); + fmpzi_mul(b, b, g); + fmpzi_gcd_shortest(g, a, b); + fmpzi_gcd_shortest(g, b, a); + } + timeit_stop(timer); + + fmpzi_clear(g); + fmpzi_clear(a); + fmpzi_clear(b); + + return timer->wall/nreps; +} + + +int main(void) +{ + ulong i; + ulong a_bits, b_bits, g_bits; + flint_rand_t state; + + flint_randinit(state); + + for (a_bits = 100000; a_bits < 10000000; a_bits += 1 + a_bits/2) + for (b_bits = a_bits; b_bits < 10000000; b_bits += 1 + b_bits/2) + { + flint_printf("a_bits:%8wu, b_bits:%8wu |", a_bits, b_bits); + for (g_bits = 1; g_bits < a_bits/2; g_bits += 100 + g_bits/2) + { + double t = profile_it(a_bits, b_bits, g_bits, state); + flint_printf(" %5.0f", t); + fflush(stdout); + } + flint_printf("\n"); + } + + flint_randclear(state); + + return 0; +}