Revision d12d603036b334b53d6e886cfa985a082e981860 authored by Emmanuel Thomé on 29 January 2021, 06:20:31 UTC, committed by Emmanuel Thomé on 29 January 2021, 21:39:17 UTC
1 parent 590bfe4
las-fbroot-qlattice.hpp
#ifndef LAS_FBROOT_QLATTICE_HPP_
#define LAS_FBROOT_QLATTICE_HPP_
#include <cstdio> // for fprintf, stderr
#include <cstdint> // for int64_t, uint64_t, INT64_C, uint32_t
#include "fb-types.h" // for fbprime_t, redc_invp_t, sublat_t
#include "las-arith.hpp" // for redc_32, invmod_redc_32, invmod_po2
#include "las-qlattice.hpp"
#include "macros.h" // for LIKELY, UNLIKELY, ASSERT_ALWAYS, ASSERT
static inline fbprime_t
fb_root_in_qlattice_31bits (const fbprime_t p, const fbprime_t R,
const uint32_t invp, const qlattice_basis &basis);
static inline fbprime_t
fb_root_in_qlattice_127bits (const fbprime_t p, const fbprime_t R,
const uint64_t invp, const qlattice_basis &basis);
/* fb_root_in_qlattice returns (R*b1-a1)/(a0-R*b0) mod p */
#if defined(SUPPORT_LARGE_Q)
/* The reason why the special-q is constrained to some limit is quite
* clearly linked to the fb_root_in_qlattice variant being used. However,
* it does not seem to be exactly 31 or 127 bits. This should be
* investigated */
#define MAX_SPECIALQ_BITSIZE 126
static inline fbprime_t
fb_root_in_qlattice(const fbprime_t p, const fbprime_t R,
const redc_invp_t invp, const qlattice_basis &basis);
static inline fbprime_t
fb_root_in_qlattice(const fbprime_t p, const fbprime_t R,
const redc_invp_t invp, const qlattice_basis &basis)
{
return fb_root_in_qlattice_127bits(p, R, invp, basis);
}
#else
#define MAX_SPECIALQ_BITSIZE 30
static inline fbprime_t
fb_root_in_qlattice(const fbprime_t p, const fbprime_t R,
const redc_invp_t invp, const qlattice_basis &basis);
static inline fbprime_t
fb_root_in_qlattice(const fbprime_t p, const fbprime_t R,
const redc_invp_t invp, const qlattice_basis &basis)
{
return fb_root_in_qlattice_31bits(p, R, invp, basis);
}
#endif
/* This helper function is used for powers of 2. See below */
static inline fbprime_t
fb_root_in_qlattice_po2 (const fbprime_t p, const fbprime_t R,
const qlattice_basis &basis);
/* The version fb_root_in_qlattice_31bits mandates that the coordinates
* of the q-lattice are at most 31 bits, so that combinations such as
* Rb1-a1 always fit within the interval ]-2^32p, +2^32p[.
* It makes 3 calls to redc_32 and 1 to invmod_redc_32.
*/
static inline fbprime_t
fb_root_in_qlattice_31bits (const fbprime_t p, const fbprime_t R,
const uint32_t invp, const qlattice_basis &basis)
{
int64_t aux1, aux2;
uint32_t u, v;
/* Handle powers of 2 separately, REDC doesn't like them */
if (UNLIKELY(!(p & 1)))
return fb_root_in_qlattice_po2(p, R, basis);
// Use Signed Redc for the computation:
// Numerator and denominator will get divided by 2^32, but this does
// not matter, since we take their quotient.
if (LIKELY(R < p)) /* Root in a,b-plane is affine */
{
aux1 = (int64_t)R * basis.b1 - basis.a1;
aux2 = basis.a0 - (int64_t)R *basis.b0;
}
else /* Root in a,b-plane is projective */
{
aux1 = basis.b1 - (int64_t)(R - p) * basis.a1;
aux2 = (int64_t)(R - p) * basis.a0 - basis.b0;
}
/* USE_NATIVE_MOD is slightly slower on Intel i5-4590 with gcc 9.2.1:
* test_fb_root 10000 reports 14.49s instead of 13.26s
* (same pattern on i7-8550U)
*/
//#define USE_NATIVE_MOD
#ifdef USE_NATIVE_MOD
u = (aux1 >= 0) ? aux1 % p : p - ((-aux1) % p);
v = (aux2 >= 0) ? aux2 % p : p - ((-aux2) % p);
#else
u = redc_32(aux1, p, invp); /* 0 <= u < p */
v = redc_32(aux2, p, invp); /* 0 <= v < p */
#endif
aux2 = invmod_redc_32(v, p);
if (LIKELY(aux2)) {
aux1 = 0;
aux2 *= u;
}
else
{
/* root in i,j-plane is projective */
aux2 = invmod_redc_32(u, p);
if (UNLIKELY(!aux2))
{
fprintf (stderr, "Error, root in (i,j)-plane is projective\n");
ASSERT_ALWAYS(0);
}
aux1 = p;
aux2 *= v;
}
return (fbprime_t) (redc_u32 (aux2, p, invp) + aux1);
}
/* This one is slower, but should be correct under the relaxed condition
* that q be at most 127 bits or so, so that the coordinates of the
* Q-lattice can be as large as 63 bits. We call redc 7 times here, instead
* of 3 for fb_root_in_qlattice_31bits.
*/
static inline fbprime_t
fb_root_in_qlattice_127bits (const fbprime_t p, const fbprime_t R,
const uint64_t invp, const qlattice_basis &basis)
{
int64_t aux1, aux2;
uint64_t u, v;
/* Handle powers of 2 separately, REDC doesn't like them */
if (UNLIKELY(!(p & 1 )))
return fb_root_in_qlattice_po2(p, R, basis);
if (LIKELY(R < p)) /* Root in a,b-plane is affine */
{
/* The products R*redc(b1) must not overflow. Therefore we must
* be extra careful when p exceeds NextPrime(Floor(2^31.5))==3037000507
aux1 = ((int64_t)R)*(int64_t) redc_32(basis.b1, p, invp) - (int64_t) redc_32(basis.a1, p, invp);
aux2 = (int64_t) redc_32(basis.a0, p, invp) - ((int64_t)R)*(int64_t) redc_32(basis.b0, p, invp);
*/
uint64_t Rl = R;
uint64_t b1l = redc_32(basis.b1, p, invp);
uint64_t b0l = redc_32(basis.b0, p, invp);
aux1 = Rl*b1l;
aux2 = Rl*b0l;
/* If we have an overflow in the products above, replace by
* something which is in the same congruence class mod p, and is
* also within the correct range.
*
* aux1 < 0 can happen if 2^31.5 <= p <= 2^32-1 < 2^32, which
* implies in particular
* 2^63 <= (p-1)^2 <= 2^64-2*2^32+1
* <= 2^64-2*p
* we also have
* 2^63.5 <= p*2^32
* <= (2^32-1)*2^32
* <= 2^64-p
* If aux1 < 0, then it is a representative of something in the range
* [2^63..2^64-2*p[
* when we subtract a correction equal to p*2^32, we have;
* 2^63-2^64+p <= aux1 - correction <= 2^64-2*p-2^63.5
* -2^63+p <= aux1 - correction <= 0.58*2^63-2*p
*
* (we could have used p<=2^32-5 as well, since p is prime -- but
* assuming p odd is sufficient)
*/
if (aux1 < 0) aux1 -= ((uint64_t)p)<<32;
if (aux2 < 0) aux2 -= ((uint64_t)p)<<32;
/* We don't want any of the two subtractions below to overflow
* either. The bounds above show that it can't happen if we
* added a correction, because we are bounded away from the type
* limits.
*
* If we did _not_ have to add a correction, then we're happy as
* well, since aux1 is positive in that case, and the range
* [0..2^63-1[ is safe for both subtractions below.
*/
aux1 = aux1 - redc_32(basis.a1, p, invp);
aux2 = redc_32(basis.a0, p, invp) - aux2;
}
else /* Root in a,b-plane is projective */
{
/*
aux1 = (int64_t) redc_32(basis.b1, p, invp) - ((int64_t)(R - p))*(int64_t) redc_32(basis.a1, p, invp);
aux2 = ((int64_t)(R - p))*(int64_t) redc_32(basis.a0, p, invp) - (int64_t) redc_32(basis.b0, p, invp);
*/
uint64_t Rpl = R - p;
uint64_t a1l = redc_32(basis.a1, p, invp);
uint64_t a0l = redc_32(basis.a0, p, invp);
aux1 = Rpl*a1l;
aux2 = Rpl*a0l;
/* same analysis as above */
if (aux1 < 0) aux1 -= ((uint64_t)p)<<32;
if (aux2 < 0) aux2 -= ((uint64_t)p)<<32;
aux1 = aux1 - redc_32(basis.b1, p, invp);
aux2 = redc_32(basis.b0, p, invp) - aux2;
}
/* The root in the (i,j) plane is (aux1:aux2). Now let's put it
* in of the two forms:
* (x:1) -> we return x=aux1/aux2.
* (1:x), with p|x -> we return p+x = p+aux2/aux1
*
* (note that p is not necessarily a prime, it may be a prime power
*/
#ifdef USE_NATIVE_MOD
u = (aux1 >= 0) ? aux1 % p : p - ((-aux1) % p);
v = (aux2 >= 0) ? aux2 % p : p - ((-aux2) % p);
#else
u = redc_32(aux1, p, invp); /* 0 <= u < p */
v = redc_32(aux2, p, invp); /* 0 <= v < p */
#endif
aux2 = invmod_redc_32(v, p);
if (LIKELY(aux2)) {
aux1 = 0;
/* Warning: since 0 <= u < p and 0 <= aux2 < p, we have
0 <= aux2 < p^2, which might overflow the int64_t range
if p >= 3037000507. To avoid this, we subtract p if aux2 >= 2^31:
* if aux2 < 2^31, then aux2 * u < 2^31*p < 2^63
* if aux2 >= 2^31, this implies that p >= 2^31 since aux2 < p,
then (aux2 - p) * u > (2^31-p) * u > -2^31*p > -2^63 */
aux2 = (aux2 < 2147483648L) ? aux2 * u : (aux2 - p) * u;
}
else
{
/* root in i,j-plane is projective */
aux2 = invmod_redc_32(u, p);
if (UNLIKELY(!aux2))
{
fprintf (stderr, "Error, root in (i,j)-plane is projective\n");
ASSERT_ALWAYS(0);
}
aux1 = p;
/* Warning: we have the same overflow problem as above. */
aux2 *= v;
}
return (fbprime_t) (redc_32 (aux2, p, invp) + aux1);
}
/* This is just for powers of 2, and is used by both versions above */
static inline fbprime_t fb_root_in_qlattice_po2 (const fbprime_t p, const fbprime_t R, const qlattice_basis &basis)
{
fbprime_t u, v;
ASSERT(p == (p & -p)); /* Test that p is power of 2 */
if (R < p) /* Root in a,b-plane is non-projective */
{
u = (int64_t)R * (basis.b1 % p) - basis.a1;
v = basis.a0 - (int64_t)R * (basis.b0 % p);
}
else /* Root in a,b-plane is projective */
{
u = basis.b1 - (int64_t)(R - p) * (basis.a1 % p);
v = (int64_t)(R - p) * (basis.a0 % p) - basis.b0;
}
if (v & 1)
{
/* root in i,j-plane is non-projective */
v = invmod_po2 (v);
return (u * v) & (p - 1);
}
else
{
/* root in i,j-plane is projective */
u = invmod_po2 (u);
return p + ((u * v) & (p - 1));
}
}
#endif /* LAS_FBROOT_QLATTICE_HPP_ */
Computing file changes ...