Revision d12d603036b334b53d6e886cfa985a082e981860 authored by Emmanuel Thomé on 29 January 2021, 06:20:31 UTC, committed by Emmanuel Thomé on 29 January 2021, 21:39:17 UTC
1 parent 590bfe4
rootsieve1.c
/* This implements Algorithm 1 from "Root Optimization of Polynomials in the
Number Field Sieve" by Shi Bai, Richard P. Brent and Emmanuel Thomé,
Mathematics of Computation, 2015.
More precisely:
* when the macro ORIGINAL is defined, it implements the original version
described in the above reference;
* when ORIGINAL is not defined (which is the default), it implements a
variant which gives better results. Namely, when p^k is the largest
power of p < B, the contribution is multiplied by p/(p-1) to take into
account lifted roots mod p^(k+1), p^(k+2), ...
On the RSA-768 polynomial, with B=V=W=200 and ORIGINAL defined, we get a
maximal difference of 0.55 between the affine alpha-value and the
computed estimation, and an average difference of 0.067, for all
polynomials of the [-200,200]^2 grid.
With ORIGINAL undefined, we get a maximal difference of 0.42 only,
and an average of 0.0045 only.
*/
#include "cado.h" // IWYU pragma: keep
#include <float.h> // DBL_MAX
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <gmp.h>
#include "auxiliary.h" /* for common routines with polyselect.c */
#include "area.h"
#include "murphyE.h"
#include "size_optimization.h"
#include "omp_proxy.h"
#include "gmp_aux.h" // ulong_isprime
#include "timing.h" // seconds
#include "macros.h"
/* define ORIGINAL if you want the original algorithm from the paper */
// #define ORIGINAL
// #define TRACE_V 7
// #define TRACE_W 3
/* The algorithm is very sensitive to GUARD_ALPHA: with GUARD_ALPHA=1.0,
almost 87% of the time is spent checking potential records.
With GUARD_ALPHA=0.5, only about 20% of the time is spent for that. */
#define GUARD_ALPHA 0.5
/* global variables */
int verbose = 0; /* verbosity level */
long *Primes, nprimes; /* primes less than B */
long *Q; /* largest p^k < B */
long bestu = 0, bestv = 0; /* current best rotation */
mpz_t bestw; /* current best rotation in w */
double best_alpha = DBL_MAX; /* alpha of best rotation */
double best_E = 0; /* E of best rotation (with -E) */
double tot_pols = 0; /* number of sieved polynomial */
long u0 = 0, v0 = 0, w0 = 0; /* initial translation */
int optimizeE = 0; /* if not zero, optimize E instead of alpha */
double guard_alpha = 0.0; /* guard when -E */
long mod = 0; /* consider class of u,v,w % mod, 0 = undef */
double tot_alpha = 0; /* sum of alpha's */
int keep = 10; /* number of best classes kept */
double effort = DBL_MAX; /* total effort */
typedef struct sieve_data {
uint16_t q;
uint16_t s;
float nu;
} sieve_data;
static void
usage_and_die (char *argv0)
{
fprintf (stderr, "usage: %s [-area a] [-I n] [-Bf b] [-Bg c] [-margin x] [-v] [-sopt] [-V vmax] [-W wmax] [-B bbb] poly\n", argv0);
fprintf (stderr, " poly: filename of polynomial\n");
fprintf (stderr, " -area a area parameter for Murphy-E computation (default %.2e)\n", AREA);
fprintf (stderr, " -I nnn I-value for Murphy-E computation (overrides area)\n");
fprintf (stderr, " -Bf b Bf bound for Murphy-E computation (default %.2e)\n", BOUND_F);
fprintf (stderr, " -Bg c Bg bound for Murphy-E computation (default %.2e)\n", BOUND_G);
fprintf (stderr, " -margin x allows a lognorm increase of x (default %.2f)\n", NORM_MARGIN);
fprintf (stderr, " -v verbose toggle\n");
fprintf (stderr, " -sopt first size-optimize the given polynomial\n");
fprintf (stderr, " -B nnn parameter for alpha computation (default %d)\n", ALPHA_BOUND);
fprintf (stderr, " -E optimize E instead of alpha\n");
exit (1);
}
/* return x mod m, with 0 <= x < m */
static long
get_mod (long x, long m)
{
x = x % m;
return (x >= 0) ? x : x + m;
}
static unsigned long
initPrimes (unsigned long B)
{
unsigned long nprimes = 0, p, q, l;
Primes = malloc (B * sizeof (unsigned long));
ASSERT_ALWAYS(Primes != NULL);
for (p = 2; p < B; p += 1 + (p > 2))
if (ulong_isprime (p))
Primes[nprimes++] = p;
Primes = realloc (Primes, nprimes * sizeof (unsigned long));
ASSERT_ALWAYS(Primes != NULL);
/* compute prime powers */
Q = malloc (nprimes * sizeof (unsigned long));
ASSERT_ALWAYS(Q != NULL);
for (l = 0; l < nprimes; l++)
{
p = Primes[l];
for (q = p; q * p < B; q *= p);
Q[l] = q;
}
return nprimes;
}
/* Put in roots[0], roots[1], ... the roots of f + g * w = 0 mod q,
and return the number of roots.
Assume 0 <= f, g < q. */
static unsigned long
get_roots (unsigned long *roots, unsigned long f, unsigned long g,
unsigned long q)
{
unsigned long nroots = 0;
unsigned long h = gcd_ul (g, q);
if (h == 1) /* only one root, namely -f/g mod q */
{
unsigned long invg = invert_ul (g, q);
roots[0] = get_mod (-f * invg, q);
nroots = 1;
}
else if ((f % h) != 0)
nroots = 0;
else
{
f /= h;
g /= h;
q /= h;
unsigned long invg = invert_ul (g, q);
roots[0] = get_mod (-f * invg, q);
for (unsigned long j = 1; j < h; j++)
roots[j] = roots[j-1] + q;
nroots = h;
}
return nroots;
}
#define TRIES 10
/* Return the average value of alpha in the class (v,w) = (modv,modw) % mod.
Assume q is a prime power. */
static double
average_alpha (cado_poly_srcptr poly0, long modv, long modw, long q)
{
cado_poly poly;
double s = 0.0, alpha;
long v0 = 0, w0 = 0, p, t;
/* check if q is a prime power */
ASSERT_ALWAYS (q >= 2);
for (p = 2; p * p <= q && q % p != 0; p += 1 + (p > 2));
if (q % p != 0)
p = q; /* q is prime */
/* now p is the smallest prime factor of q */
for (t = q; t % p == 0; t = t / p);
ASSERT_ALWAYS (t == 1);
/* first make a local copy of the original polynomial */
cado_poly_init (poly);
cado_poly_set (poly, poly0);
#define G1 poly->pols[RAT_SIDE]->coeff[1]
#define G0 poly->pols[RAT_SIDE]->coeff[0]
/* first rotate by modv*x+modw */
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, 0, modv, 1);
v0 = modv;
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, 0, modw, 0);
w0 = modw;
for (long j = 0; j < TRIES; j++)
{
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, v0, q * j + modv, 1);
v0 = q * j + modv;
for (long k = 0; k < TRIES; k++)
{
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, w0, q*k + modw, 0);
w0 = q * k + modw;
alpha = get_alpha_affine_p (poly->pols[ALG_SIDE], p);
s += alpha;
}
}
#undef G1
#undef G0
cado_poly_clear (poly);
return s / pow ((double) TRIES, 2.0);
}
/* return 1/p mod q */
static long
invert (long p, long q)
{
for (long t = 1; t < q; t++)
if ((t * p) % q == 1)
return t;
ASSERT_ALWAYS(0);
}
/* Return c such that c = a mod p and c = b mod q.
Assume invp = 1/p mod q. */
static long
crt (long a, long b, long p, long q, long invp)
{
/* assume c = a + t*p, then t = (b-a)/p mod q */
long t = (b - a) % q;
if (t < 0)
t += q;
t = (t * invp) % q;
return a + t * p;
}
typedef struct
{
long vmod, wmod;
double alpha;
} class;
/* Insert alpha into c, where c has already n entries (maximum is keep).
Return the new value of n. */
static int
insert_class (class *c, int n, int keep, double alpha, long v, long w,
long vmin, long vmax, long mod)
{
int i;
/* check if this class has at least one representative in [vmin,vmax] */
long t = get_mod (v - vmin, mod);
if (vmin + t > vmax)
return n; /* no representative in [vmin,vmax] */
/* if alpha exceeds the best alpha value + guard_alpha, then it cannot
yield A[j] < best_alpha + guard_alpha in rotate_v(). Note that this
remains true after crt: if mod=mod1*mod2, and alpha1 > best_alpha1
+ guard_alpha, then since alpha = alpha1 + alpha2, then
alpha > best_alpha1 + best_alpha2 + guard_alpha */
if (n > 0 && alpha > c[0].alpha + guard_alpha)
return n;
for (i = n; i > 0 && alpha < c[i-1].alpha; i--)
c[i] = c[i-1];
/* now i = 0 or alpha >= c[i-1].alpha */
if (i < keep)
{
c[i].vmod = v;
c[i].wmod = w;
c[i].alpha = alpha;
}
n += (n < keep);
return n;
}
/* Return the (at most keep) best classes (v,w) mod 'mod'.
Put in *nc the number of returned classes. */
static class*
best_classes (cado_poly poly0, long mod, int keep, long vmin, long vmax,
int *nc, long u)
{
int nfactors = 0;
unsigned long *factors = NULL, p;
class *c, *d, *e;
int nd, ne;
int i;
cado_poly poly;
long q, Q = 1;
if (mod == 1)
{
c = malloc (sizeof (class));
c[0].vmod = c[0].wmod = 0;
c[0].alpha = 0; /* value does not matter */
*nc = 1;
return c;
}
*nc = 0;
/* first determine the prime factors of mod */
for (long t = mod, p = 2; t != 1; p += 1 + (p & 1))
{
if ((t % p) == 0)
{
nfactors ++;
factors = realloc (factors, nfactors * sizeof (unsigned long));
ASSERT_ALWAYS(factors != NULL);
q = 1;
while ((t % p) == 0)
{
t /= p;
q *= p;
}
factors[nfactors - 1] = q;
}
}
/* make a local copy of the original polynomial */
cado_poly_init (poly);
cado_poly_set (poly, poly0);
c = malloc ((keep + 1) * sizeof (class));
ASSERT_ALWAYS(c != NULL);
d = malloc ((keep + 1) * sizeof (class));
ASSERT_ALWAYS(d != NULL);
e = malloc ((keep + 1) * sizeof (class));
ASSERT_ALWAYS(e != NULL);
for (i = 0; i < nfactors; i++)
{
nd = 0; /* number of elements in 'd' */
q = factors[i];
/* determine p such that q=p^k */
for (p = 2; q % p; p++);
for (long v = 0; v < q; v++)
{
for (long w = 0; w < q; w++)
{
double alpha;
alpha = average_alpha (poly, v, w, q);
nd = insert_class (d, nd, keep, alpha, v, w, vmin, vmax, q);
}
}
if (i == 0) /* copy d into c */
{
memcpy (c, d, nd * sizeof (class));
*nc = nd;
}
else /* merge c and d into e */
{
long inv = invert (Q, q);
ne = 0;
for (int ic = 0; ic < *nc; ic++)
for (int id = 0; id < nd; id++)
{
double alpha = c[ic].alpha + d[id].alpha;
/* if alpha is larger (i.e., worse) than the last element,
since d[] is sorted by increasing values of alpha, we
assume all further values will be worse */
if (ne == keep && e[ne-1].alpha < alpha)
break;
long v = crt (c[ic].vmod, d[id].vmod, Q, q, inv);
long w = crt (c[ic].wmod, d[id].wmod, Q, q, inv);
ne = insert_class (e, ne, keep, alpha, v, w, vmin, vmax, Q * q);
}
/* copy back e to c */
memcpy (c, e, ne * sizeof (class));
*nc = ne;
}
Q *= q;
}
/* if u = -u0, check the class of the initial polynomial (-v0,-w0) */
if (u == -u0)
{
int included = -1;
for (i = 0; i < *nc; i++)
if (get_mod (-v0, mod) == c[i].vmod && get_mod (-w0, mod) == c[i].wmod)
included = i;
if (included >= 0)
printf ("class of initial polynomial has rank %d (%.2f)\n",
included, c[included].alpha);
else
{
double alpha = 0;
for (int j = 0; j < nfactors; j++)
alpha += average_alpha (poly, get_mod (-v0, factors[j]),
get_mod (-w0, factors[j]), factors[j]);
printf ("class of initial polynomial is not included");
if (*nc > 0)
printf (" (last %.2f wrt %.2f)\n", c[*nc - 1].alpha, alpha);
else
printf (" (%.2f)\n", alpha);
}
}
cado_poly_clear (poly);
free (factors);
free (d);
free (e);
return c;
}
/* rotation for a fixed value of v */
static void
rotate_v (cado_poly_srcptr poly0, long v, long B,
double maxlognorm, double Bf, double Bg, double area, long u,
long modw)
{
long w, wmin, wmax;
cado_poly poly;
long l;
#define G1 poly->pols[RAT_SIDE]->coeff[1]
#define G0 poly->pols[RAT_SIDE]->coeff[0]
mpz_t wminz, wmaxz;
double tot_pols_local = 0;
double tot_alpha_local = 0;
mpz_init (wminz);
mpz_init (wmaxz);
/* first make a local copy of the original polynomial */
cado_poly_init (poly);
cado_poly_set (poly, poly0);
/* compute f + (v*x)*g */
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, 0, v, 1);
rotation_space r;
expected_growth (&r, poly->pols[ALG_SIDE], poly->pols[RAT_SIDE], 0,
maxlognorm, poly->skew);
mpz_set_d (wminz, r.kmin);
mpz_set_d (wmaxz, r.kmax);
if (verbose > 1)
#pragma omp critical
{
gmp_printf ("v=%ld: wmin=%Zd wmax=%Zd\n", v, wminz, wmaxz);
fflush (stdout);
}
/* Ensure wminz % mod = modw. Since mod <= MAX_LONG, we have
s := wminz % mod < MAX_LONG thus modw - s fits in a long and
there is no underflow below. */
long t = get_mod (modw - mpz_fdiv_ui (wminz, mod), mod);
ASSERT_ALWAYS(0 <= t && t < mod);
mpz_add_ui (wminz, wminz, t);
if (mpz_cmp (wminz, wmaxz) >= 0)
goto end;
/* if mod != 1, we have f + (k*mod+modw)*g = (f+modw*g) + k*(mod*g) */
if (mod > 1)
{
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, 0, modw, 0); /* f <- f+modw*g */
mpz_poly_mul_si (poly->pols[RAT_SIDE], poly->pols[RAT_SIDE], mod);
/* wmin -> (wmin - modw) / mod */
mpz_sub_ui (wminz, wminz, modw);
ASSERT_ALWAYS(mpz_divisible_ui_p (wminz, mod));
mpz_divexact_ui (wminz, wminz, mod);
/* wmax -> (wmax - modw) / mod */
mpz_sub_ui (wmaxz, wmaxz, modw);
mpz_cdiv_q_ui (wmaxz, wmaxz, mod);
}
/* compute the expected value sum(log(p)/(p-1), p < B) and the
sum of largest prime powers sum(p^floor(log(B-1)/log(p)), p < B) */
double expected = 0.0;
unsigned long sum_of_prime_powers = 0;
for (l = 0; l < nprimes; l++)
{
long p = Primes[l];
expected += log ((double) p) / (double) (p - 1);
sum_of_prime_powers += Q[l];
}
ASSERT_ALWAYS (mpz_fits_slong_p (wmaxz));
ASSERT_ALWAYS (mpz_fits_slong_p (wminz));
wmax = mpz_get_si (wmaxz);
wmin = mpz_get_si (wminz);
mpz_t ump;
mpz_init (ump);
unsigned long *roots = malloc (B * sizeof (long));
ASSERT_ALWAYS(roots != NULL);
float *L;
double nu;
L = malloc (B * sizeof (float));
/* sieve data: sieve_q contains the largest p^k < B for each prime p,
it thus fits in an uint16_t if B does;
sieve_s contains the first index i multiple of q where the contribution
sieve_nu should be added, it is thus smaller than q and thus fits too. */
sieve_data *sieve_d = malloc (sum_of_prime_powers * sizeof (sieve_data));
unsigned long sieve_n = 0;
for (l = 0; l < nprimes; l++)
{
long p = Primes[l], s, t, q;
double logp = log ((double) p);
memset (L, 0, B * sizeof (float));
for (q = p; q <= Q[l]; q *= p)
{
/* the contribution is log(p)/p^(k-1)/(p+1) when the exponent k
is not the largest one, and log(p)/p^(k-1)/(p+1)*p/(p-1) for the
largest exponent k */
nu = logp / (double) q * (double) p / (double) (p + 1);
#ifndef ORIGINAL
if (q == Q[l])
nu *= (double) p / (double) (p - 1);
#endif
for (long x = 0; x < q; x++)
{
/* compute f(x) and g(x) mod p^k, where q = p^k */
unsigned long fx, gx;
mpz_poly_eval_ui (ump, poly->pols[ALG_SIDE], x);
fx = mpz_fdiv_ui (ump, q);
mpz_poly_eval_ui (ump, poly->pols[RAT_SIDE], x);
gx = mpz_fdiv_ui (ump, q);
/* search roots w of fx + w*gx = 0 mod q */
unsigned long nroots = get_roots (roots, fx, gx, q);
for (unsigned long i = 0; i < nroots; i++)
{
long w = roots[i];
/* update for w+t*q */
for (t = 0; t < Q[l] / q; t++)
L[w + t * q] += nu;
}
}
}
/* prepare data for the sieve */
q = Q[l];
for (w = 0; w < q; w++)
{
nu = L[w];
if (nu == 0.0)
continue;
/* compute s = w+t*q-wmin such that s - q < 0 <= s, i.e.,
(t-1)*q < wmin-w <= t*q: t = ceil((wmin-w)/q) */
if (wmin - w < 0)
t = (wmin - w) / q;
else
t = (wmin - w + q - 1) / q;
s = w + t * q - wmin;
sieve_d[sieve_n].q = q;
sieve_d[sieve_n].s = s;
sieve_d[sieve_n].nu = nu;
sieve_n ++;
}
}
ASSERT_ALWAYS(sieve_n <= sum_of_prime_powers);
#define LEN (1<<14) /* length of the sieve array */
float *A = malloc (LEN * sizeof (float));
ASSERT_ALWAYS(A != NULL);
/* we sieve by chunks of LEN cells at a time */
long wcur = wmin;
while (wcur < wmax)
{
/* A[j] corresponds to w = wcur + j */
for (long j = 0; j < LEN; j++)
A[j] = expected;
#if defined(TRACE_V) && defined(TRACE_W)
if (v == TRACE_V && (mod * wcur + modw <= TRACE_W &&
TRACE_W < mod * (wcur + LEN) + modw))
printf ("initialized A[%d] to %f\n", TRACE_W, A[(TRACE_W - modw) / mod - wcur]);
#endif
/* now perform the sieve */
for (unsigned long i = 0; i < sieve_n; i++)
{
long q = sieve_d[i].q;
long s = sieve_d[i].s;
float nu = sieve_d[i].nu;
/* if mod=1, a given value of s corresponds to w = wcur + s
if mod>1, then s corresponds to w = mod*(wcur + s) + modw */
while (s < LEN)
{
#if defined(TRACE_V) && defined(TRACE_W)
if (v == TRACE_V && TRACE_W == mod * (wcur + s) + modw)
printf ("q=%ld: update A[%d] from %f to %f\n",
q, TRACE_W, A[s], A[s] - nu);
#endif
A[s] -= nu;
s += q;
}
sieve_d[i].s = s - LEN;
}
/* check for the smallest A[s] */
/* if wcur + LEN > wmax, we check only wmax - wcur entries */
long maxj = (wcur + LEN <= wmax) ? LEN : wmax - wcur;
for (long j = 0; j < maxj; j++)
{
tot_alpha_local += A[j];
tot_pols_local += 1;
/* print alpha and E of original polynomial */
if (u == -u0 && v == -v0 && mod * (wcur + j) + modw == -w0)
{
w = wcur + j; /* local value of w, the global one is mod * w + modw */
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, 0, w, 0);
double skew = poly->skew; /* save skewness */
poly->skew = L2_skewness (poly->pols[ALG_SIDE], SKEWNESS_DEFAULT_PREC);
double lognorm = L2_lognorm (poly->pols[ALG_SIDE], poly->skew);
/* to compute E, we need to divide g by mod */
mpz_poly_divexact_ui (poly->pols[RAT_SIDE], poly->pols[RAT_SIDE], mod);
double E = MurphyE (poly, Bf, Bg, area, MURPHY_K, get_alpha_bound ());
/* restore g */
mpz_poly_mul_si (poly->pols[RAT_SIDE], poly->pols[RAT_SIDE], mod);
/* this can only occur for one thread, thus no need to put
#pragma omp critical */
gmp_printf ("u=%ld v=%ld w=%ld lognorm=%.2f est_alpha_aff=%.2f E=%.2e [original]\n",
u, v, mod * w + modw, lognorm, A[j], E);
fflush (stdout);
/* restore the original polynomial (w=0) and skewness */
poly->skew = skew;
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, w, 0, 0);
}
if (A[j] < best_alpha + guard_alpha)
{
w = wcur + j;
/* compute E */
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, 0, w, 0);
double skew = poly->skew; /* save skewness */
poly->skew = L2_skewness (poly->pols[ALG_SIDE], SKEWNESS_DEFAULT_PREC);
double lognorm = L2_lognorm (poly->pols[ALG_SIDE], poly->skew);
/* to compute E, we need to divide g by mod */
mpz_poly_divexact_ui (poly->pols[RAT_SIDE], poly->pols[RAT_SIDE], mod);
double E = MurphyE (poly, Bf, Bg, area, MURPHY_K, get_alpha_bound ());
/* restore g */
mpz_poly_mul_si (poly->pols[RAT_SIDE], poly->pols[RAT_SIDE], mod);
/* restore the original polynomial (w=0) and skewness */
poly->skew = skew;
rotate_aux (poly->pols[ALG_SIDE]->coeff, G1, G0, w, 0, 0);
if (optimizeE == 0 || (optimizeE == 1 && E > best_E))
#pragma omp critical
{
bestu = u;
bestv = v;
mpz_set_si (bestw, mod * w + modw);
best_alpha = (double) A[j];
best_E = E;
gmp_printf ("u=%ld v=%ld w=%Zd lognorm=%.2f est_alpha_aff=%.2f E=%.2e\n",
u, v, bestw, lognorm, best_alpha, E);
fflush (stdout);
}
}
}
#if defined(TRACE_V) && defined(TRACE_W)
if (v == TRACE_V && (mod * wcur + modw <= TRACE_W &&
TRACE_W < mod * (wcur + LEN) + modw))
printf ("A[%d] = %f\n", TRACE_W, A[(TRACE_W - modw) / mod - wcur]);
#endif
wcur += LEN;
}
free (sieve_d);
free (L);
free (roots);
mpz_clear (ump);
free (A);
end:
cado_poly_clear (poly);
#undef G1
#undef G0
mpz_clear (wminz);
mpz_clear (wmaxz);
/* accumulate the number of polynomials and the alpha values */
#pragma omp critical
{
tot_pols += tot_pols_local;
tot_alpha += tot_alpha_local;
}
}
static void
rotate (cado_poly poly, long B, double maxlognorm, double Bf, double Bg,
double area, long u)
{
/* determine range [vmin,vmax] */
rotation_space r;
expected_growth (&r, poly->pols[ALG_SIDE], poly->pols[RAT_SIDE], 1,
maxlognorm, poly->skew);
long vmin = (r.kmin < (double) LONG_MIN) ? LONG_MIN : r.kmin;
long vmax = (r.kmax > (double) LONG_MAX) ? LONG_MAX : r.kmax;
if (verbose)
{
printf ("u=%ld: vmin=%ld vmax=%ld\n", u, vmin, vmax);
fflush (stdout);
}
class *c = NULL;
int n;
c = best_classes (poly, mod, keep, vmin, vmax, &n, u);
ASSERT_ALWAYS (n <= keep);
printf ("u=%ld: kept %d class(es)", u, n);
if (n == 0)
printf ("\n");
else
printf (" with alpha from %.2f to %.2f\n", c[0].alpha, c[n-1].alpha);
#pragma omp parallel for schedule(dynamic)
for (int i = 0; i < n; i++)
{
if (verbose > 1)
#pragma omp critical
printf ("u=%ld, class i=%d: -mod %ld -modv %ld -modw %ld %.2f\n",
u0 + u, i, mod, get_mod (v0 + c[i].vmod, mod),
get_mod (w0 + c[i].wmod, mod), c[i].alpha);
long vmin1 = vmin;
/* first ensure that vmin1 = modv % mod */
long t = get_mod (c[i].vmod - vmin1, mod);
ASSERT_ALWAYS(0 <= t && t < mod);
vmin1 += t;
ASSERT_ALWAYS(get_mod (vmin1, mod) == c[i].vmod);
for (long v = vmin1; v <= vmax; v += mod)
rotate_v (poly, v, B, maxlognorm, Bf, Bg, area, u, c[i].wmod);
}
free (c);
}
/* don't modify poly, which is the size-optimized polynomial
(poly0 is the initial polynomial) */
static void
print_transformation (cado_poly_ptr poly0, cado_poly_srcptr poly)
{
mpz_t k;
int d = poly0->pols[ALG_SIDE]->deg;
mpz_init (k);
/* first compute the translation k: g(x+k) = g1*x + g1*k + g0 */
mpz_sub (k, poly->pols[RAT_SIDE]->coeff[0], poly0->pols[RAT_SIDE]->coeff[0]);
ASSERT_ALWAYS(mpz_divisible_p (k, poly0->pols[RAT_SIDE]->coeff[1]));
mpz_divexact (k, k, poly0->pols[RAT_SIDE]->coeff[1]);
gmp_printf ("translation %Zd, ", k);
do_translate_z (poly0->pols[ALG_SIDE], poly0->pols[RAT_SIDE]->coeff, k);
/* size_optimization might multiply f0 by some integer t */
ASSERT_ALWAYS(mpz_divisible_p (poly->pols[ALG_SIDE]->coeff[d],
poly0->pols[ALG_SIDE]->coeff[d]));
mpz_divexact (k, poly->pols[ALG_SIDE]->coeff[d],
poly0->pols[ALG_SIDE]->coeff[d]);
if (mpz_cmp_ui (k, 1) != 0)
{
gmp_printf ("multiplier %Zd, ", k);
mpz_poly_mul_mpz (poly0->pols[ALG_SIDE], poly0->pols[ALG_SIDE], k);
}
/* now compute rotation by x^2 */
mpz_sub (k, poly->pols[ALG_SIDE]->coeff[3], poly0->pols[ALG_SIDE]->coeff[3]);
ASSERT_ALWAYS(mpz_divisible_p (k, poly0->pols[RAT_SIDE]->coeff[1]));
mpz_divexact (k, k, poly0->pols[RAT_SIDE]->coeff[1]);
gmp_printf ("rotation [%Zd,", k);
ASSERT (mpz_fits_slong_p (k));
u0 = mpz_get_si (k);
rotate_auxg_z (poly0->pols[ALG_SIDE]->coeff, poly0->pols[RAT_SIDE]->coeff[1],
poly0->pols[RAT_SIDE]->coeff[0], k, 2);
mpz_sub (k, poly->pols[ALG_SIDE]->coeff[2], poly0->pols[ALG_SIDE]->coeff[2]);
ASSERT_ALWAYS(mpz_divisible_p (k, poly0->pols[RAT_SIDE]->coeff[1]));
mpz_divexact (k, k, poly0->pols[RAT_SIDE]->coeff[1]);
gmp_printf ("%Zd,", k);
ASSERT (mpz_fits_slong_p (k));
v0 = mpz_get_si (k);
rotate_auxg_z (poly0->pols[ALG_SIDE]->coeff, poly0->pols[RAT_SIDE]->coeff[1],
poly0->pols[RAT_SIDE]->coeff[0], k, 1);
mpz_sub (k, poly->pols[ALG_SIDE]->coeff[1], poly0->pols[ALG_SIDE]->coeff[1]);
ASSERT_ALWAYS(mpz_divisible_p (k, poly0->pols[RAT_SIDE]->coeff[1]));
mpz_divexact (k, k, poly0->pols[RAT_SIDE]->coeff[1]);
gmp_printf ("%Zd]\n", k);
ASSERT (mpz_fits_slong_p (k));
w0 = mpz_get_si (k);
rotate_auxg_z (poly0->pols[ALG_SIDE]->coeff, poly0->pols[RAT_SIDE]->coeff[1],
poly0->pols[RAT_SIDE]->coeff[0], k, 0);
ASSERT_ALWAYS(mpz_cmp (poly0->pols[ALG_SIDE]->coeff[0],
poly->pols[ALG_SIDE]->coeff[0]) == 0);
mpz_clear (k);
}
double
rotate_area_v (cado_poly_srcptr poly0, double maxlognorm, long v)
{
cado_poly poly;
double area;
cado_poly_init (poly);
cado_poly_set (poly, poly0);
rotate_aux (poly->pols[ALG_SIDE]->coeff, poly->pols[RAT_SIDE]->coeff[1],
poly->pols[RAT_SIDE]->coeff[0], 0, v, 1);
rotation_space r;
expected_growth (&r, poly->pols[ALG_SIDE], poly->pols[RAT_SIDE], 0,
maxlognorm, poly->skew);
area = r.kmax - r.kmin;
cado_poly_clear (poly);
return area;
}
/* estimate the rootsieve area for a given u */
double
rotate_area_u (cado_poly_srcptr poly0, double maxlognorm, long u)
{
double area, sum = 0.0;
long h, vmin, vmax;
cado_poly poly;
cado_poly_init (poly);
cado_poly_set (poly, poly0);
rotate_aux (poly->pols[ALG_SIDE]->coeff, poly->pols[RAT_SIDE]->coeff[1],
poly->pols[RAT_SIDE]->coeff[0], 0, u, 2);
rotation_space r;
expected_growth (&r, poly->pols[ALG_SIDE], poly->pols[RAT_SIDE], 1,
maxlognorm, poly->skew);
vmin = (r.kmin < (double) LONG_MIN) ? LONG_MIN : r.kmin;
vmax = (r.kmax > (double) LONG_MAX) ? LONG_MAX : r.kmax;
#define SAMPLE 100
if (vmax / SAMPLE - vmin / SAMPLE > 1)
h = vmax / SAMPLE - vmin / SAMPLE;
else
h = 1;
for (long v = vmin; v <= vmax; v += h)
{
area = rotate_area_v (poly, maxlognorm, v);
sum += area;
}
cado_poly_clear (poly);
return sum * h;
}
/* estimate the rootsieve area for umin <= u <= umax */
double
rotate_area (cado_poly_srcptr poly, double maxlognorm, long umin, long umax)
{
double area, sum = 0.0;
for (long u = umin; u <= umax; u++)
{
area = rotate_area_u (poly, maxlognorm, u);
sum += area;
}
return sum;
}
/* Given a sieving area, a maximal effort, and a value of keep,
compute the best 'mod' value. */
long
best_mod (double area, double maxeffort, double keep)
{
long l[] = {1, 2, 6, 12, 60, 420, 840, 2520, 27720, 360360, 720720, 12252240,
232792560, 5354228880, 26771144400, 80313433200, 2329089562800};
int i = 0;
double e;
do {
mod = l[i];
/* The number of polynomials sieved is approximately area/mod^2*keep.
Note: there is a bias when vmax-vmin is smaller than mod, since we
only keep classes that contain at least an element in [vmin, vmax],
thus the probability is larger than (vmax-vmin)/mod. */
e = area / (double) mod / (double) mod * (double) keep;
if (e <= maxeffort)
break;
i += 1;
}
while (i < 17);
printf ("using mod = %ld, effort = %.2e\n", mod, e);
return mod;
}
int
main (int argc, char **argv)
{
int argc0 = argc;
char **argv0 = argv;
cado_poly poly;
int I = 0;
double margin = NORM_MARGIN;
long umin = LONG_MIN, umax = LONG_MAX;
int sopt = 0;
double time = seconds ();
while (argc >= 2 && argv[1][0] == '-')
{
if (strcmp (argv[1], "-area") == 0)
{
area = atof (argv [2]);
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-I") == 0)
{
I = atoi (argv [2]);
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-Bf") == 0)
{
bound_f = atof (argv [2]);
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-Bg") == 0)
{
bound_g = atof (argv [2]);
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-margin") == 0)
{
margin = atof (argv [2]);
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-effort") == 0)
{
effort = atof (argv [2]);
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-v") == 0)
{
verbose ++;
argv ++;
argc --;
}
else if (strcmp (argv[1], "-E") == 0)
{
optimizeE = 1;
guard_alpha = GUARD_ALPHA;
argv ++;
argc --;
}
else if (strcmp (argv[1], "-sopt") == 0)
{
sopt = 1;
argv ++;
argc --;
}
else if (strcmp (argv[1], "-B") == 0)
{
unsigned long B = strtol (argv [2], NULL, 10);
set_alpha_bound (B);
argv += 2;
argc -= 2;
}
/* use http://oeis.org/A051451 for -mod:
1, 2, 6, 12, 60, 420, 840, 2520, 27720, 360360, 720720, 12252240,
232792560, 5354228880, 26771144400, 80313433200, 2329089562800,
72201776446800, 144403552893600 */
else if (strcmp (argv[1], "-mod") == 0)
{
mod = strtol (argv [2], NULL, 10);
ASSERT_ALWAYS(mod >= 1);
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-umin") == 0)
{
umin = strtol (argv [2], NULL, 10);
ASSERT_ALWAYS(umin > LONG_MIN); /* LONG_MIN is reserved */
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-umax") == 0)
{
umax = strtol (argv [2], NULL, 10);
ASSERT_ALWAYS(umax < LONG_MAX); /* LONG_MAX is reserved */
argv += 2;
argc -= 2;
}
else if (strcmp (argv[1], "-keep") == 0)
{
keep = atoi (argv [2]);
argv += 2;
argc -= 2;
}
else
break;
}
if (argc != 2)
usage_and_die (argv[0]);
#pragma omp parallel
#pragma omp master
printf ("# Using %d thread(s)\n", omp_get_num_threads ());
ASSERT_ALWAYS(B <= 65536);
if (I != 0)
area = bound_f * pow (2.0, (double) (2 * I - 1));
cado_poly_init (poly);
if (!cado_poly_read (poly, argv[1]))
{
fprintf (stderr, "Problem when reading file %s\n", argv[1]);
usage_and_die (argv[0]);
}
if (poly->skew == 0.0)
poly->skew = L2_skewness (poly->pols[ALG_SIDE], SKEWNESS_DEFAULT_PREC);
/* if -sopt, size-optimize */
if (sopt)
{
cado_poly c;
cado_poly_init (c);
cado_poly_set (c, poly);
size_optimization (c->pols[ALG_SIDE], c->pols[RAT_SIDE],
poly->pols[ALG_SIDE], poly->pols[RAT_SIDE],
SOPT_DEFAULT_EFFORT, verbose);
printf ("# initial polynomial:\n");
cado_poly_fprintf (stdout, poly, "");
print_transformation (poly, c);
cado_poly_set (poly, c);
printf ("# size-optimized polynomial:\n");
cado_poly_fprintf (stdout, poly, "");
cado_poly_clear (c);
}
nprimes = initPrimes (B);
/* compute the skewness */
poly->skew = L2_skewness (poly->pols[ALG_SIDE], SKEWNESS_DEFAULT_PREC);
double lognorm = L2_lognorm (poly->pols[ALG_SIDE], poly->skew);
double maxlognorm = lognorm + margin;
printf ("initial lognorm %.2f, maxlognorm %.2f\n", lognorm, maxlognorm);
/* determine range [umin,umax] */
rotation_space r;
expected_growth (&r, poly->pols[ALG_SIDE], poly->pols[RAT_SIDE], 2,
maxlognorm, poly->skew);
if (umin == LONG_MIN) /* umin was not given by the user */
umin = (r.kmin < (double) LONG_MIN) ? LONG_MIN : r.kmin;
if (umax == LONG_MAX) /* umax was not given by the user */
umax = (r.kmax > (double) LONG_MAX) ? LONG_MAX : r.kmax;
if (verbose)
printf ("umin=%ld umax=%ld\n", umin, umax);
if (mod == 0) /* compute best 'mod' for given effort */
{
double sieving_area = rotate_area (poly, maxlognorm, umin, umax);
/* print total sieving area */
printf ("sieving area %.2e\n", sieving_area);
mod = best_mod (sieving_area, effort, keep);
}
mpz_init (bestw);
long u0 = 0; /* current translation in u */
for (long u = umin; u <= umax; u++)
{
rotate_aux (poly->pols[ALG_SIDE]->coeff,
poly->pols[RAT_SIDE]->coeff[1],
poly->pols[RAT_SIDE]->coeff[0], u0, u, 2);
u0 = u;
rotate (poly, B, maxlognorm, bound_f, bound_g, area, u);
}
/* restore original polynomial */
rotate_aux (poly->pols[ALG_SIDE]->coeff, poly->pols[RAT_SIDE]->coeff[1],
poly->pols[RAT_SIDE]->coeff[0], u0, 0, 2);
/* perform the best rotation */
gmp_printf ("best rotation: u=%ld v=%ld w=%Zd alpha=%1.2f\n",
bestu, bestv, bestw, best_alpha);
/* perform the best rotation */
rotate_aux (poly->pols[ALG_SIDE]->coeff, poly->pols[RAT_SIDE]->coeff[1],
poly->pols[RAT_SIDE]->coeff[0], 0, bestu, 2);
rotate_aux (poly->pols[ALG_SIDE]->coeff, poly->pols[RAT_SIDE]->coeff[1],
poly->pols[RAT_SIDE]->coeff[0], 0, bestv, 1);
rotate_auxg_z (poly->pols[ALG_SIDE]->coeff, poly->pols[RAT_SIDE]->coeff[1],
poly->pols[RAT_SIDE]->coeff[0], bestw, 0);
/* recompute the skewness of the best polynomial */
poly->skew = L2_combined_skewness2 (poly->pols[0], poly->pols[1],
SKEWNESS_DEFAULT_PREC);
print_cadopoly_extra (stdout, poly, argc0, argv0, 0);
time = seconds () - time;
printf ("# Sieved %.2e polynomials in %.2f seconds (%.2es/p)\n",
tot_pols, time, time / tot_pols);
printf ("# Average alpha %.2f\n", tot_alpha / tot_pols);
free (Primes);
free (Q);
cado_poly_clear (poly);
mpz_clear (bestw);
return 0;
}
Computing file changes ...