From: Paul Eggert Date: Sat, 24 May 2025 17:29:06 +0000 (-0700) Subject: factor: squfof cleanup X-Git-Tag: v9.8~243 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8ff11d626b0a2635b9246c2448a157621f8ee828;p=thirdparty%2Fcoreutils.git factor: squfof cleanup * src/factor.c (USE_SQUFOF, STAT_SQUFOF): Assume these are always false, and simplify the code accordingly. We can bring it back later if needed be. --- diff --git a/src/factor.c b/src/factor.c index ff9d3c0be3..95383556a5 100644 --- a/src/factor.c +++ b/src/factor.c @@ -43,8 +43,7 @@ inverses, but instead relies on GMP for fast divisibility testing.) (2) Check the nature of any non-factored part using Miller-Rabin for detecting composites, and Lucas for detecting primes. - (3) Factor any remaining composite part using the Pollard-Brent rho - algorithm or if USE_SQUFOF is defined to 1, try that first. + (3) Factor any remaining composite part using Pollard-Brent rho. Status of found factors are checked again using Miller-Rabin and Lucas. We prefer using Hensel norm in the divisions, not the more familiar @@ -90,16 +89,6 @@ # define PROVE_PRIMALITY 1 #endif -/* Faster for certain ranges but less general. */ -#ifndef USE_SQUFOF -# define USE_SQUFOF 0 -#endif - -/* Output SQUFOF statistics. */ -#ifndef STAT_SQUFOF -# define STAT_SQUFOF 0 -#endif - #include #include @@ -1761,455 +1750,6 @@ mp_factor_using_pollard_rho (mpz_t n, unsigned long int a, mpz_clears (P, t2, t, z, x, y, nullptr); } -#if USE_SQUFOF -/* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)? If - algorithm is replaced, consider also returning the remainder. */ -ATTRIBUTE_CONST -static wide_uint -isqrt (wide_uint n) -{ - if (n == 0) - return 0; - - int c = stdc_leading_zeros (n); - - /* Make x > sqrt(n). This will be invariant through the loop. */ - wide_uint x = (wide_uint) 1 << ((W_TYPE_SIZE + 1 - c) >> 1); - - for (;;) - { - wide_uint y = (x + n / x) / 2; - if (y >= x) - return x; - - x = y; - } -} - -ATTRIBUTE_CONST -static wide_uint -isqrt2 (wide_uint nh, wide_uint nl) -{ - /* Ensures the remainder fits in a wide_uint. */ - affirm (nh < ((wide_uint) 1 << (W_TYPE_SIZE - 2))); - - if (nh == 0) - return isqrt (nl); - - int shift = stdc_leading_zeros (nh) & ~1; - - /* Make x > sqrt (n). */ - wide_uint x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1; - x <<= (W_TYPE_SIZE - shift) >> 1; - - /* Do we need more than one iteration? */ - for (;;) - { - MAYBE_UNUSED wide_uint r; - wide_uint q, y; - udiv_qrnnd (q, r, nh, nl, x); - y = (x + q) / 2; - - if (y >= x) - { - wide_uint hi, lo; - umul_ppmm (hi, lo, x + 1, x + 1); - affirm (gt2 (hi, lo, nh, nl)); - - umul_ppmm (hi, lo, x, x); - affirm (ge2 (nh, nl, hi, lo)); - sub_ddmmss (hi, lo, nh, nl, hi, lo); - affirm (hi == 0); - - return x; - } - - x = y; - } -} - -/* MAGIC[N] has a bit i set iff i is a quadratic residue mod N. */ -# define MAGIC64 0x0202021202030213ULL -# define MAGIC63 0x0402483012450293ULL -# define MAGIC65 0x218a019866014613ULL -# define MAGIC11 0x23b - -/* Return the square root if the input is a square, otherwise 0. */ -ATTRIBUTE_CONST -static wide_uint -is_square (wide_uint x) -{ - /* Uses the tests suggested by Cohen. Excludes 99% of the non-squares before - computing the square root. */ - if (((MAGIC64 >> (x & 63)) & 1) - && ((MAGIC63 >> (x % 63)) & 1) - /* Both 0 and 64 are squares mod (65). */ - && ((MAGIC65 >> ((x % 65) & 63)) & 1) - && ((MAGIC11 >> (x % 11) & 1))) - { - wide_uint r = isqrt (x); - if (r * r == x) - return r; - } - return 0; -} - -/* invtab[i] = floor (0x10000 / (0x100 + i) */ -static short const invtab[0x81] = - { - 0x200, - 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1, - 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7, - 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af, - 0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199, - 0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186, - 0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174, - 0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164, - 0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155, - 0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147, - 0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b, - 0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f, - 0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124, - 0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a, - 0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111, - 0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108, - 0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100, - }; - -/* Compute q = [u/d], r = u mod d. Avoids slow hardware division for the case - that q < 0x40; here it instead uses a table of (Euclidean) inverses. */ -# define div_smallq(q, r, u, d) \ - do { \ - if ((u) / 0x40 < (d)) \ - { \ - wide_uint _dinv, _mask, _q, _r; \ - int _cnt = stdc_leading_zeros (d); \ - _r = (u); \ - if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \ - { \ - _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \ - _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \ - } \ - else \ - { \ - _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \ - _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \ - } \ - _r -= _q * (d); \ - \ - _mask = -(wide_uint) (_r >= (d)); \ - (r) = _r - (_mask & (d)); \ - (q) = _q - _mask; \ - affirm ((q) * (d) + (r) == u); \ - } \ - else \ - { \ - wide_uint _q = (u) / (d); \ - (r) = (u) - _q * (d); \ - (q) = _q; \ - } \ - } while (0) - -/* Notes: Example N = 22117019. After first phase we find Q1 = 6314, Q - = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025), - with 3025 = 55^2. - - Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652, - representing G_0 = (-55, 2 * 4652, 8653). - - In the notation of the paper: - - S_{-1} = 55, S_0 = 8653, R_0 = 4652 - - Put - - t_0 = floor([q_0 + R_0] / S0) = 1 - R_1 = t_0 * S_0 - R_0 = 4001 - S_1 = S_{-1} +t_0 (R_0 - R_1) = 706 -*/ - -/* Multipliers, in order of efficiency: - 0.7268 3*5*7*11 = 1155 = 3 (mod 4) - 0.7317 3*5*7 = 105 = 1 - 0.7820 3*5*11 = 165 = 1 - 0.7872 3*5 = 15 = 3 - 0.8101 3*7*11 = 231 = 3 - 0.8155 3*7 = 21 = 1 - 0.8284 5*7*11 = 385 = 1 - 0.8339 5*7 = 35 = 3 - 0.8716 3*11 = 33 = 1 - 0.8774 3 = 3 = 3 - 0.8913 5*11 = 55 = 3 - 0.8972 5 = 5 = 1 - 0.9233 7*11 = 77 = 1 - 0.9295 7 = 7 = 3 - 0.9934 11 = 11 = 3 -*/ -# define QUEUE_SIZE 50 -#endif - -#if STAT_SQUFOF -# define Q_FREQ_SIZE 50 -/* Element 0 keeps the total */ -static int q_freq[Q_FREQ_SIZE + 1]; -#endif - -#if USE_SQUFOF -/* Return true on success. Expected to fail only for numbers - >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */ -static bool -factor_using_squfof (wide_uint n1, wide_uint n0, struct factors *factors) -{ - /* Uses algorithm and notation from - - SQUARE FORM FACTORIZATION - JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR. - - https://homes.cerias.purdue.edu/~ssw/squfof.pdf - */ - - static short const multipliers_1[] = - { /* = 1 (mod 4) */ - 105, 165, 21, 385, 33, 5, 77, 1, 0 - }; - static short const multipliers_3[] = - { /* = 3 (mod 4) */ - 1155, 15, 231, 35, 3, 55, 7, 11, 0 - }; - - struct { wide_uint Q; wide_uint P; } queue[QUEUE_SIZE]; - - if (n1 >= ((wide_uint) 1 << (W_TYPE_SIZE - 2))) - return false; - - wide_uint sqrt_n = isqrt2 (n1, n0); - - if (n0 == sqrt_n * sqrt_n) - { - wide_uint p1, p0; - - umul_ppmm (p1, p0, sqrt_n, sqrt_n); - affirm (p0 == n0); - - if (n1 == p1) - { - if (prime_p (sqrt_n)) - factor_insert_multiplicity (factors, sqrt_n, 2); - else - { - struct factors f; - - f.nfactors = 0; - if (!factor_using_squfof (0, sqrt_n, &f)) - { - /* Try pollard rho instead */ - factor_using_pollard_rho (sqrt_n, 1, &f); - } - /* Duplicate the new factors */ - for (unsigned int i = 0; i < f.nfactors; i++) - factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]); - } - return true; - } - } - - /* Select multipliers so we always get n * mu = 3 (mod 4) */ - for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1; - *m; m++) - { - wide_uint S, Dh, Dl, Q1, Q, P, L, L1, B; - unsigned int i; - unsigned int mu = *m; - int qpos = 0; - - affirm (mu * n0 % 4 == 3); - - /* In the notation of the paper, with mu * n == 3 (mod 4), we - get \Delta = 4 mu * n, and the paper's \mu is 2 mu. As far as - I understand it, the necessary bound is 4 \mu^3 < n, or 32 - mu^3 < n. - - However, this seems insufficient: With n = 37243139 and mu = - 105, we get a trivial factor, from the square 38809 = 197^2, - without any corresponding Q earlier in the iteration. - - Requiring 64 mu^3 < n seems sufficient. */ - if (n1 == 0) - { - if ((wide_uint) mu * mu * mu >= n0 / 64) - continue; - } - else - { - if (n1 > ((wide_uint) 1 << (W_TYPE_SIZE - 2)) / mu) - continue; - } - umul_ppmm (Dh, Dl, n0, mu); - Dh += n1 * mu; - - affirm (Dl % 4 != 1); - affirm (Dh < (wide_uint) 1 << (W_TYPE_SIZE - 2)); - - S = isqrt2 (Dh, Dl); - - Q1 = 1; - P = S; - - /* Square root remainder fits in one word, so ignore high part. */ - Q = Dl - P * P; - /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))? */ - L = isqrt (2 * S); - B = 2 * L; - L1 = mu * 2 * L; - - /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) = - 4 D. */ - - for (i = 0; i <= B; i++) - { - wide_uint q, P1, t, rem; - - div_smallq (q, rem, S + P, Q); - P1 = S - rem; /* P1 = q*Q - P */ - - affirm (q > 0 && Q > 0); - -# if STAT_SQUFOF - q_freq[0]++; - q_freq[MIN (q, Q_FREQ_SIZE)]++; -# endif - - if (Q <= L1) - { - wide_uint g = Q; - - if ((Q & 1) == 0) - g /= 2; - - g /= gcd_odd (g, mu); - - if (g <= L) - { - if (qpos >= QUEUE_SIZE) - error (EXIT_FAILURE, 0, _("squfof queue overflow")); - queue[qpos].Q = g; - queue[qpos].P = P % g; - qpos++; - } - } - - /* I think the difference can be either sign, but mod - 2^W_TYPE_SIZE arithmetic should be fine. */ - t = Q1 + q * (P - P1); - Q1 = Q; - Q = t; - P = P1; - - if ((i & 1) == 0) - { - wide_uint r = is_square (Q); - if (r) - { - for (int j = 0; j < qpos; j++) - { - if (queue[j].Q == r) - { - if (r == 1) - /* Traversed entire cycle. */ - goto next_multiplier; - - /* Need the absolute value for divisibility test. */ - if (P >= queue[j].P) - t = P - queue[j].P; - else - t = queue[j].P - P; - if (t % r == 0) - { - /* Delete entries up to and including entry - j, which matched. */ - memmove (queue, queue + j + 1, - (qpos - j - 1) * sizeof (queue[0])); - qpos -= (j + 1); - } - goto next_i; - } - } - - /* We have found a square form, which should give a - factor. */ - Q1 = r; - affirm (S >= P); /* What signs are possible? */ - P += r * ((S - P) / r); - - /* Note: Paper says (N - P*P) / Q1, that seems incorrect - for the case D = 2N. */ - /* Compute Q = (D - P*P) / Q1, but we need double - precision. */ - wide_uint hi, lo; - umul_ppmm (hi, lo, P, P); - sub_ddmmss (hi, lo, Dh, Dl, hi, lo); - udiv_qrnnd (Q, rem, hi, lo, Q1); - affirm (rem == 0); - - for (;;) - { - /* Note: There appears to by a typo in the paper, - Step 4a in the algorithm description says q <-- - floor([S+P]/\hat Q), but looking at the equations - in Sec. 3.1, it should be q <-- floor([S+P] / Q). - (In this code, \hat Q is Q1). */ - div_smallq (q, rem, S + P, Q); - P1 = S - rem; /* P1 = q*Q - P */ - -# if STAT_SQUFOF - q_freq[0]++; - q_freq[MIN (q, Q_FREQ_SIZE)]++; -# endif - if (P == P1) - break; - t = Q1 + q * (P - P1); - Q1 = Q; - Q = t; - P = P1; - } - - if ((Q & 1) == 0) - Q /= 2; - Q /= gcd_odd (Q, mu); - - affirm (Q > 1 && (n1 || Q < n0)); - - if (prime_p (Q)) - factor_insert (factors, Q); - else if (!factor_using_squfof (0, Q, factors)) - factor_using_pollard_rho (Q, 2, factors); - - divexact_21 (n1, n0, n1, n0, Q); - - if (prime2_p (n1, n0)) - factor_insert_large (factors, n1, n0); - else - { - if (!factor_using_squfof (n1, n0, factors)) - { - if (n1 == 0) - factor_using_pollard_rho (n0, 1, factors); - else - factor_using_pollard_rho2 (n1, n0, 1, factors); - } - } - - return true; - } - } - next_i:; - } - next_multiplier:; - } - return false; -} -#endif - /* Compute the prime factors of the 128-bit number (T1,T0), and put the results in FACTORS. */ static void @@ -2230,11 +1770,6 @@ factor (wide_uint t1, wide_uint t0, struct factors *factors) factor_insert_large (factors, t1, t0); else { -#if USE_SQUFOF - if (factor_using_squfof (t1, t0, factors)) - return; -#endif - if (t1 == 0) factor_using_pollard_rho (t0, 1, factors); else @@ -2681,10 +2216,6 @@ main (int argc, char **argv) atexit (lbuf_flush); -#if STAT_SQUFOF - memset (q_freq, 0, sizeof (q_freq)); -#endif - bool ok; if (argc <= optind) ok = do_stdin (); @@ -2696,20 +2227,5 @@ main (int argc, char **argv) ok = false; } -#if STAT_SQUFOF - if (q_freq[0] > 0) - { - double acc_f; - printf ("q freq. cum. freq.(total: %d)\n", q_freq[0]); - for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++) - { - double f = (double) q_freq[i] / q_freq[0]; - acc_f += f; - printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i, - 100.0 * f, 100.0 * acc_f); - } - } -#endif - return ok ? EXIT_SUCCESS : EXIT_FAILURE; }