inverses, but instead relies on GMP for fast divisibility testing.)
(2) Check the nature of any non-factored part using Miller-Rabin for
detecting composites, and Lucas for detecting primes.
- (3) Factor any remaining composite part using the Pollard-Brent rho
- algorithm or if USE_SQUFOF is defined to 1, try that first.
+ (3) Factor any remaining composite part using Pollard-Brent rho.
Status of found factors are checked again using Miller-Rabin and Lucas.
We prefer using Hensel norm in the divisions, not the more familiar
# define PROVE_PRIMALITY 1
#endif
-/* Faster for certain ranges but less general. */
-#ifndef USE_SQUFOF
-# define USE_SQUFOF 0
-#endif
-
-/* Output SQUFOF statistics. */
-#ifndef STAT_SQUFOF
-# define STAT_SQUFOF 0
-#endif
-
#include <config.h>
#include <getopt.h>
mpz_clears (P, t2, t, z, x, y, nullptr);
}
-#if USE_SQUFOF
-/* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If
- algorithm is replaced, consider also returning the remainder. */
-ATTRIBUTE_CONST
-static wide_uint
-isqrt (wide_uint n)
-{
- if (n == 0)
- return 0;
-
- int c = stdc_leading_zeros (n);
-
- /* Make x > sqrt(n). This will be invariant through the loop. */
- wide_uint x = (wide_uint) 1 << ((W_TYPE_SIZE + 1 - c) >> 1);
-
- for (;;)
- {
- wide_uint y = (x + n / x) / 2;
- if (y >= x)
- return x;
-
- x = y;
- }
-}
-
-ATTRIBUTE_CONST
-static wide_uint
-isqrt2 (wide_uint nh, wide_uint nl)
-{
- /* Ensures the remainder fits in a wide_uint. */
- affirm (nh < ((wide_uint) 1 << (W_TYPE_SIZE - 2)));
-
- if (nh == 0)
- return isqrt (nl);
-
- int shift = stdc_leading_zeros (nh) & ~1;
-
- /* Make x > sqrt (n). */
- wide_uint x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
- x <<= (W_TYPE_SIZE - shift) >> 1;
-
- /* Do we need more than one iteration? */
- for (;;)
- {
- MAYBE_UNUSED wide_uint r;
- wide_uint q, y;
- udiv_qrnnd (q, r, nh, nl, x);
- y = (x + q) / 2;
-
- if (y >= x)
- {
- wide_uint hi, lo;
- umul_ppmm (hi, lo, x + 1, x + 1);
- affirm (gt2 (hi, lo, nh, nl));
-
- umul_ppmm (hi, lo, x, x);
- affirm (ge2 (nh, nl, hi, lo));
- sub_ddmmss (hi, lo, nh, nl, hi, lo);
- affirm (hi == 0);
-
- return x;
- }
-
- x = y;
- }
-}
-
-/* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */
-# define MAGIC64 0x0202021202030213ULL
-# define MAGIC63 0x0402483012450293ULL
-# define MAGIC65 0x218a019866014613ULL
-# define MAGIC11 0x23b
-
-/* Return the square root if the input is a square, otherwise 0. */
-ATTRIBUTE_CONST
-static wide_uint
-is_square (wide_uint x)
-{
- /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before
- computing the square root. */
- if (((MAGIC64 >> (x & 63)) & 1)
- && ((MAGIC63 >> (x % 63)) & 1)
- /* Both 0 and 64 are squares mod (65). */
- && ((MAGIC65 >> ((x % 65) & 63)) & 1)
- && ((MAGIC11 >> (x % 11) & 1)))
- {
- wide_uint r = isqrt (x);
- if (r * r == x)
- return r;
- }
- return 0;
-}
-
-/* invtab[i] = floor (0x10000 / (0x100 + i) */
-static short const invtab[0x81] =
- {
- 0x200,
- 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
- 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
- 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
- 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
- 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
- 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
- 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
- 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
- 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
- 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
- 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
- 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
- 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
- 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
- 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
- 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
- };
-
-/* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case
- that q < 0x40; here it instead uses a table of (Euclidean) inverses. */
-# define div_smallq(q, r, u, d) \
- do { \
- if ((u) / 0x40 < (d)) \
- { \
- wide_uint _dinv, _mask, _q, _r; \
- int _cnt = stdc_leading_zeros (d); \
- _r = (u); \
- if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \
- { \
- _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \
- _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \
- } \
- else \
- { \
- _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \
- _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \
- } \
- _r -= _q * (d); \
- \
- _mask = -(wide_uint) (_r >= (d)); \
- (r) = _r - (_mask & (d)); \
- (q) = _q - _mask; \
- affirm ((q) * (d) + (r) == u); \
- } \
- else \
- { \
- wide_uint _q = (u) / (d); \
- (r) = (u) - _q * (d); \
- (q) = _q; \
- } \
- } while (0)
-
-/* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q
- = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
- with 3025 = 55^2.
-
- Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
- representing G_0 = (-55, 2 * 4652, 8653).
-
- In the notation of the paper:
-
- S_{-1} = 55, S_0 = 8653, R_0 = 4652
-
- Put
-
- t_0 = floor([q_0 + R_0] / S0) = 1
- R_1 = t_0 * S_0 - R_0 = 4001
- S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
-*/
-
-/* Multipliers, in order of efficiency:
- 0.7268 3*5*7*11 = 1155 = 3 (mod 4)
- 0.7317 3*5*7 = 105 = 1
- 0.7820 3*5*11 = 165 = 1
- 0.7872 3*5 = 15 = 3
- 0.8101 3*7*11 = 231 = 3
- 0.8155 3*7 = 21 = 1
- 0.8284 5*7*11 = 385 = 1
- 0.8339 5*7 = 35 = 3
- 0.8716 3*11 = 33 = 1
- 0.8774 3 = 3 = 3
- 0.8913 5*11 = 55 = 3
- 0.8972 5 = 5 = 1
- 0.9233 7*11 = 77 = 1
- 0.9295 7 = 7 = 3
- 0.9934 11 = 11 = 3
-*/
-# define QUEUE_SIZE 50
-#endif
-
-#if STAT_SQUFOF
-# define Q_FREQ_SIZE 50
-/* Element 0 keeps the total */
-static int q_freq[Q_FREQ_SIZE + 1];
-#endif
-
-#if USE_SQUFOF
-/* Return true on success. Expected to fail only for numbers
- >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */
-static bool
-factor_using_squfof (wide_uint n1, wide_uint n0, struct factors *factors)
-{
- /* Uses algorithm and notation from
-
- SQUARE FORM FACTORIZATION
- JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
-
- https://homes.cerias.purdue.edu/~ssw/squfof.pdf
- */
-
- static short const multipliers_1[] =
- { /* = 1 (mod 4) */
- 105, 165, 21, 385, 33, 5, 77, 1, 0
- };
- static short const multipliers_3[] =
- { /* = 3 (mod 4) */
- 1155, 15, 231, 35, 3, 55, 7, 11, 0
- };
-
- struct { wide_uint Q; wide_uint P; } queue[QUEUE_SIZE];
-
- if (n1 >= ((wide_uint) 1 << (W_TYPE_SIZE - 2)))
- return false;
-
- wide_uint sqrt_n = isqrt2 (n1, n0);
-
- if (n0 == sqrt_n * sqrt_n)
- {
- wide_uint p1, p0;
-
- umul_ppmm (p1, p0, sqrt_n, sqrt_n);
- affirm (p0 == n0);
-
- if (n1 == p1)
- {
- if (prime_p (sqrt_n))
- factor_insert_multiplicity (factors, sqrt_n, 2);
- else
- {
- struct factors f;
-
- f.nfactors = 0;
- if (!factor_using_squfof (0, sqrt_n, &f))
- {
- /* Try pollard rho instead */
- factor_using_pollard_rho (sqrt_n, 1, &f);
- }
- /* Duplicate the new factors */
- for (unsigned int i = 0; i < f.nfactors; i++)
- factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
- }
- return true;
- }
- }
-
- /* Select multipliers so we always get n * mu = 3 (mod 4) */
- for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
- *m; m++)
- {
- wide_uint S, Dh, Dl, Q1, Q, P, L, L1, B;
- unsigned int i;
- unsigned int mu = *m;
- int qpos = 0;
-
- affirm (mu * n0 % 4 == 3);
-
- /* In the notation of the paper, with mu * n == 3 (mod 4), we
- get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as
- I understand it, the necessary bound is 4 \mu^3 < n, or 32
- mu^3 < n.
-
- However, this seems insufficient: With n = 37243139 and mu =
- 105, we get a trivial factor, from the square 38809 = 197^2,
- without any corresponding Q earlier in the iteration.
-
- Requiring 64 mu^3 < n seems sufficient. */
- if (n1 == 0)
- {
- if ((wide_uint) mu * mu * mu >= n0 / 64)
- continue;
- }
- else
- {
- if (n1 > ((wide_uint) 1 << (W_TYPE_SIZE - 2)) / mu)
- continue;
- }
- umul_ppmm (Dh, Dl, n0, mu);
- Dh += n1 * mu;
-
- affirm (Dl % 4 != 1);
- affirm (Dh < (wide_uint) 1 << (W_TYPE_SIZE - 2));
-
- S = isqrt2 (Dh, Dl);
-
- Q1 = 1;
- P = S;
-
- /* Square root remainder fits in one word, so ignore high part. */
- Q = Dl - P * P;
- /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */
- L = isqrt (2 * S);
- B = 2 * L;
- L1 = mu * 2 * L;
-
- /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
- 4 D. */
-
- for (i = 0; i <= B; i++)
- {
- wide_uint q, P1, t, rem;
-
- div_smallq (q, rem, S + P, Q);
- P1 = S - rem; /* P1 = q*Q - P */
-
- affirm (q > 0 && Q > 0);
-
-# if STAT_SQUFOF
- q_freq[0]++;
- q_freq[MIN (q, Q_FREQ_SIZE)]++;
-# endif
-
- if (Q <= L1)
- {
- wide_uint g = Q;
-
- if ((Q & 1) == 0)
- g /= 2;
-
- g /= gcd_odd (g, mu);
-
- if (g <= L)
- {
- if (qpos >= QUEUE_SIZE)
- error (EXIT_FAILURE, 0, _("squfof queue overflow"));
- queue[qpos].Q = g;
- queue[qpos].P = P % g;
- qpos++;
- }
- }
-
- /* I think the difference can be either sign, but mod
- 2^W_TYPE_SIZE arithmetic should be fine. */
- t = Q1 + q * (P - P1);
- Q1 = Q;
- Q = t;
- P = P1;
-
- if ((i & 1) == 0)
- {
- wide_uint r = is_square (Q);
- if (r)
- {
- for (int j = 0; j < qpos; j++)
- {
- if (queue[j].Q == r)
- {
- if (r == 1)
- /* Traversed entire cycle. */
- goto next_multiplier;
-
- /* Need the absolute value for divisibility test. */
- if (P >= queue[j].P)
- t = P - queue[j].P;
- else
- t = queue[j].P - P;
- if (t % r == 0)
- {
- /* Delete entries up to and including entry
- j, which matched. */
- memmove (queue, queue + j + 1,
- (qpos - j - 1) * sizeof (queue[0]));
- qpos -= (j + 1);
- }
- goto next_i;
- }
- }
-
- /* We have found a square form, which should give a
- factor. */
- Q1 = r;
- affirm (S >= P); /* What signs are possible? */
- P += r * ((S - P) / r);
-
- /* Note: Paper says (N - P*P) / Q1, that seems incorrect
- for the case D = 2N. */
- /* Compute Q = (D - P*P) / Q1, but we need double
- precision. */
- wide_uint hi, lo;
- umul_ppmm (hi, lo, P, P);
- sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
- udiv_qrnnd (Q, rem, hi, lo, Q1);
- affirm (rem == 0);
-
- for (;;)
- {
- /* Note: There appears to by a typo in the paper,
- Step 4a in the algorithm description says q <--
- floor([S+P]/\hat Q), but looking at the equations
- in Sec. 3.1, it should be q <-- floor([S+P] / Q).
- (In this code, \hat Q is Q1). */
- div_smallq (q, rem, S + P, Q);
- P1 = S - rem; /* P1 = q*Q - P */
-
-# if STAT_SQUFOF
- q_freq[0]++;
- q_freq[MIN (q, Q_FREQ_SIZE)]++;
-# endif
- if (P == P1)
- break;
- t = Q1 + q * (P - P1);
- Q1 = Q;
- Q = t;
- P = P1;
- }
-
- if ((Q & 1) == 0)
- Q /= 2;
- Q /= gcd_odd (Q, mu);
-
- affirm (Q > 1 && (n1 || Q < n0));
-
- if (prime_p (Q))
- factor_insert (factors, Q);
- else if (!factor_using_squfof (0, Q, factors))
- factor_using_pollard_rho (Q, 2, factors);
-
- divexact_21 (n1, n0, n1, n0, Q);
-
- if (prime2_p (n1, n0))
- factor_insert_large (factors, n1, n0);
- else
- {
- if (!factor_using_squfof (n1, n0, factors))
- {
- if (n1 == 0)
- factor_using_pollard_rho (n0, 1, factors);
- else
- factor_using_pollard_rho2 (n1, n0, 1, factors);
- }
- }
-
- return true;
- }
- }
- next_i:;
- }
- next_multiplier:;
- }
- return false;
-}
-#endif
-
/* Compute the prime factors of the 128-bit number (T1,T0), and put the
results in FACTORS. */
static void
factor_insert_large (factors, t1, t0);
else
{
-#if USE_SQUFOF
- if (factor_using_squfof (t1, t0, factors))
- return;
-#endif
-
if (t1 == 0)
factor_using_pollard_rho (t0, 1, factors);
else
atexit (lbuf_flush);
-#if STAT_SQUFOF
- memset (q_freq, 0, sizeof (q_freq));
-#endif
-
bool ok;
if (argc <= optind)
ok = do_stdin ();
ok = false;
}
-#if STAT_SQUFOF
- if (q_freq[0] > 0)
- {
- double acc_f;
- printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]);
- for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
- {
- double f = (double) q_freq[i] / q_freq[0];
- acc_f += f;
- printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
- 100.0 * f, 100.0 * acc_f);
- }
- }
-#endif
-
return ok ? EXIT_SUCCESS : EXIT_FAILURE;
}