From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sat, 24 May 2025 17:29:06 +0000 (-0700)
Subject: factor: squfof cleanup
X-Git-Tag: v9.8~243
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=8ff11d626b0a2635b9246c2448a157621f8ee828;p=thirdparty%2Fcoreutils.git

factor: squfof cleanup

* src/factor.c (USE_SQUFOF, STAT_SQUFOF):
Assume these are always false, and simplify the code accordingly.
We can bring it back later if needed be.
---

diff --git a/src/factor.c b/src/factor.c
index ff9d3c0be3..95383556a5 100644
--- a/src/factor.c
+++ b/src/factor.c
@@ -43,8 +43,7 @@
         inverses, but instead relies on GMP for fast divisibility testing.)
     (2) Check the nature of any non-factored part using Miller-Rabin for
         detecting composites, and Lucas for detecting primes.
-    (3) Factor any remaining composite part using the Pollard-Brent rho
-        algorithm or if USE_SQUFOF is defined to 1, try that first.
+    (3) Factor any remaining composite part using Pollard-Brent rho.
         Status of found factors are checked again using Miller-Rabin and Lucas.
 
     We prefer using Hensel norm in the divisions, not the more familiar
@@ -90,16 +89,6 @@
 # define PROVE_PRIMALITY 1
 #endif
 
-/* Faster for certain ranges but less general.  */
-#ifndef USE_SQUFOF
-# define USE_SQUFOF 0
-#endif
-
-/* Output SQUFOF statistics.  */
-#ifndef STAT_SQUFOF
-# define STAT_SQUFOF 0
-#endif
-
 
 #include <config.h>
 #include <getopt.h>
@@ -1761,455 +1750,6 @@ mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
   mpz_clears (P, t2, t, z, x, y, nullptr);
 }
 
-#if USE_SQUFOF
-/* FIXME: Maybe better to use an iteration converging to 1/sqrt(n)?  If
-   algorithm is replaced, consider also returning the remainder.  */
-ATTRIBUTE_CONST
-static wide_uint
-isqrt (wide_uint n)
-{
-  if (n == 0)
-    return 0;
-
-  int c = stdc_leading_zeros (n);
-
-  /* Make x > sqrt(n).  This will be invariant through the loop.  */
-  wide_uint x = (wide_uint) 1 << ((W_TYPE_SIZE + 1 - c) >> 1);
-
-  for (;;)
-    {
-      wide_uint y = (x + n / x) / 2;
-      if (y >= x)
-        return x;
-
-      x = y;
-    }
-}
-
-ATTRIBUTE_CONST
-static wide_uint
-isqrt2 (wide_uint nh, wide_uint nl)
-{
-  /* Ensures the remainder fits in a wide_uint.  */
-  affirm (nh < ((wide_uint) 1 << (W_TYPE_SIZE - 2)));
-
-  if (nh == 0)
-    return isqrt (nl);
-
-  int shift = stdc_leading_zeros (nh) & ~1;
-
-  /* Make x > sqrt (n).  */
-  wide_uint x = isqrt ((nh << shift) + (nl >> (W_TYPE_SIZE - shift))) + 1;
-  x <<= (W_TYPE_SIZE - shift) >> 1;
-
-  /* Do we need more than one iteration?  */
-  for (;;)
-    {
-      MAYBE_UNUSED wide_uint r;
-      wide_uint q, y;
-      udiv_qrnnd (q, r, nh, nl, x);
-      y = (x + q) / 2;
-
-      if (y >= x)
-        {
-          wide_uint hi, lo;
-          umul_ppmm (hi, lo, x + 1, x + 1);
-          affirm (gt2 (hi, lo, nh, nl));
-
-          umul_ppmm (hi, lo, x, x);
-          affirm (ge2 (nh, nl, hi, lo));
-          sub_ddmmss (hi, lo, nh, nl, hi, lo);
-          affirm (hi == 0);
-
-          return x;
-        }
-
-      x = y;
-    }
-}
-
-/* MAGIC[N] has a bit i set iff i is a quadratic residue mod N.  */
-# define MAGIC64 0x0202021202030213ULL
-# define MAGIC63 0x0402483012450293ULL
-# define MAGIC65 0x218a019866014613ULL
-# define MAGIC11 0x23b
-
-/* Return the square root if the input is a square, otherwise 0.  */
-ATTRIBUTE_CONST
-static wide_uint
-is_square (wide_uint x)
-{
-  /* Uses the tests suggested by Cohen.  Excludes 99% of the non-squares before
-     computing the square root.  */
-  if (((MAGIC64 >> (x & 63)) & 1)
-      && ((MAGIC63 >> (x % 63)) & 1)
-      /* Both 0 and 64 are squares mod (65).  */
-      && ((MAGIC65 >> ((x % 65) & 63)) & 1)
-      && ((MAGIC11 >> (x % 11) & 1)))
-    {
-      wide_uint r = isqrt (x);
-      if (r * r == x)
-        return r;
-    }
-  return 0;
-}
-
-/* invtab[i] = floor (0x10000 / (0x100 + i) */
-static short const invtab[0x81] =
-  {
-    0x200,
-    0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1,
-    0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7,
-    0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af,
-    0x1ac, 0x1a9, 0x1a6, 0x1a4, 0x1a1, 0x19e, 0x19c, 0x199,
-    0x197, 0x194, 0x192, 0x18f, 0x18d, 0x18a, 0x188, 0x186,
-    0x183, 0x181, 0x17f, 0x17d, 0x17a, 0x178, 0x176, 0x174,
-    0x172, 0x170, 0x16e, 0x16c, 0x16a, 0x168, 0x166, 0x164,
-    0x162, 0x160, 0x15e, 0x15c, 0x15a, 0x158, 0x157, 0x155,
-    0x153, 0x151, 0x150, 0x14e, 0x14c, 0x14a, 0x149, 0x147,
-    0x146, 0x144, 0x142, 0x141, 0x13f, 0x13e, 0x13c, 0x13b,
-    0x139, 0x138, 0x136, 0x135, 0x133, 0x132, 0x130, 0x12f,
-    0x12e, 0x12c, 0x12b, 0x129, 0x128, 0x127, 0x125, 0x124,
-    0x123, 0x121, 0x120, 0x11f, 0x11e, 0x11c, 0x11b, 0x11a,
-    0x119, 0x118, 0x116, 0x115, 0x114, 0x113, 0x112, 0x111,
-    0x10f, 0x10e, 0x10d, 0x10c, 0x10b, 0x10a, 0x109, 0x108,
-    0x107, 0x106, 0x105, 0x104, 0x103, 0x102, 0x101, 0x100,
-  };
-
-/* Compute q = [u/d], r = u mod d.  Avoids slow hardware division for the case
-   that q < 0x40; here it instead uses a table of (Euclidean) inverses.  */
-# define div_smallq(q, r, u, d)                                          \
-  do {                                                                  \
-    if ((u) / 0x40 < (d))                                               \
-      {                                                                 \
-        wide_uint _dinv, _mask, _q, _r;                                 \
-        int _cnt = stdc_leading_zeros (d);				\
-        _r = (u);                                                       \
-        if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8)))                        \
-          {                                                             \
-            _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80];   \
-            _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt);                \
-          }                                                             \
-        else                                                            \
-          {                                                             \
-            _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f];   \
-            _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11;        \
-          }                                                             \
-        _r -= _q * (d);                                                 \
-                                                                        \
-        _mask = -(wide_uint) (_r >= (d));                               \
-        (r) = _r - (_mask & (d));                                       \
-        (q) = _q - _mask;                                               \
-        affirm ((q) * (d) + (r) == u);					\
-      }                                                                 \
-    else                                                                \
-      {                                                                 \
-        wide_uint _q = (u) / (d);                                       \
-        (r) = (u) - _q * (d);                                           \
-        (q) = _q;                                                       \
-      }                                                                 \
-  } while (0)
-
-/* Notes: Example N = 22117019.  After first phase we find Q1 = 6314, Q
-   = 3025, P = 1737, representing F_{18} = (-6314, 2 * 1737, 3025),
-   with 3025 = 55^2.
-
-   Constructing the square root, we get Q1 = 55, Q = 8653, P = 4652,
-   representing G_0 = (-55, 2 * 4652, 8653).
-
-   In the notation of the paper:
-
-   S_{-1} = 55, S_0 = 8653, R_0 = 4652
-
-   Put
-
-     t_0 = floor([q_0 + R_0] / S0) = 1
-     R_1 = t_0 * S_0 - R_0 = 4001
-     S_1 = S_{-1} +t_0 (R_0 - R_1) = 706
-*/
-
-/* Multipliers, in order of efficiency:
-   0.7268  3*5*7*11 = 1155 = 3 (mod 4)
-   0.7317  3*5*7    =  105 = 1
-   0.7820  3*5*11   =  165 = 1
-   0.7872  3*5      =   15 = 3
-   0.8101  3*7*11   =  231 = 3
-   0.8155  3*7      =   21 = 1
-   0.8284  5*7*11   =  385 = 1
-   0.8339  5*7      =   35 = 3
-   0.8716  3*11     =   33 = 1
-   0.8774  3        =    3 = 3
-   0.8913  5*11     =   55 = 3
-   0.8972  5        =    5 = 1
-   0.9233  7*11     =   77 = 1
-   0.9295  7        =    7 = 3
-   0.9934  11       =   11 = 3
-*/
-# define QUEUE_SIZE 50
-#endif
-
-#if STAT_SQUFOF
-# define Q_FREQ_SIZE 50
-/* Element 0 keeps the total */
-static int q_freq[Q_FREQ_SIZE + 1];
-#endif
-
-#if USE_SQUFOF
-/* Return true on success.  Expected to fail only for numbers
-   >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit.  */
-static bool
-factor_using_squfof (wide_uint n1, wide_uint n0, struct factors *factors)
-{
-  /* Uses algorithm and notation from
-
-     SQUARE FORM FACTORIZATION
-     JASON E. GOWER AND SAMUEL S. WAGSTAFF, JR.
-
-     https://homes.cerias.purdue.edu/~ssw/squfof.pdf
-   */
-
-  static short const multipliers_1[] =
-    { /* = 1 (mod 4) */
-      105, 165, 21, 385, 33, 5, 77, 1, 0
-    };
-  static short const multipliers_3[] =
-    { /* = 3 (mod 4) */
-      1155, 15, 231, 35, 3, 55, 7, 11, 0
-    };
-
-  struct { wide_uint Q; wide_uint P; } queue[QUEUE_SIZE];
-
-  if (n1 >= ((wide_uint) 1 << (W_TYPE_SIZE - 2)))
-    return false;
-
-  wide_uint sqrt_n = isqrt2 (n1, n0);
-
-  if (n0 == sqrt_n * sqrt_n)
-    {
-      wide_uint p1, p0;
-
-      umul_ppmm (p1, p0, sqrt_n, sqrt_n);
-      affirm (p0 == n0);
-
-      if (n1 == p1)
-        {
-          if (prime_p (sqrt_n))
-            factor_insert_multiplicity (factors, sqrt_n, 2);
-          else
-            {
-              struct factors f;
-
-              f.nfactors = 0;
-              if (!factor_using_squfof (0, sqrt_n, &f))
-                {
-                  /* Try pollard rho instead */
-                  factor_using_pollard_rho (sqrt_n, 1, &f);
-                }
-              /* Duplicate the new factors */
-              for (unsigned int i = 0; i < f.nfactors; i++)
-                factor_insert_multiplicity (factors, f.p[i], 2 * f.e[i]);
-            }
-          return true;
-        }
-    }
-
-  /* Select multipliers so we always get n * mu = 3 (mod 4) */
-  for (short const *m = (n0 % 4 == 1) ? multipliers_3 : multipliers_1;
-       *m; m++)
-    {
-      wide_uint S, Dh, Dl, Q1, Q, P, L, L1, B;
-      unsigned int i;
-      unsigned int mu = *m;
-      int qpos = 0;
-
-      affirm (mu * n0 % 4 == 3);
-
-      /* In the notation of the paper, with mu * n == 3 (mod 4), we
-         get \Delta = 4 mu * n, and the paper's \mu is 2 mu.  As far as
-         I understand it, the necessary bound is 4 \mu^3 < n, or 32
-         mu^3 < n.
-
-         However, this seems insufficient: With n = 37243139 and mu =
-         105, we get a trivial factor, from the square 38809 = 197^2,
-         without any corresponding Q earlier in the iteration.
-
-         Requiring 64 mu^3 < n seems sufficient.  */
-      if (n1 == 0)
-        {
-          if ((wide_uint) mu * mu * mu >= n0 / 64)
-            continue;
-        }
-      else
-        {
-          if (n1 > ((wide_uint) 1 << (W_TYPE_SIZE - 2)) / mu)
-            continue;
-        }
-      umul_ppmm (Dh, Dl, n0, mu);
-      Dh += n1 * mu;
-
-      affirm (Dl % 4 != 1);
-      affirm (Dh < (wide_uint) 1 << (W_TYPE_SIZE - 2));
-
-      S = isqrt2 (Dh, Dl);
-
-      Q1 = 1;
-      P = S;
-
-      /* Square root remainder fits in one word, so ignore high part.  */
-      Q = Dl - P * P;
-      /* FIXME: When can this differ from floor (sqrt (2 * sqrt (D)))?  */
-      L = isqrt (2 * S);
-      B = 2 * L;
-      L1 = mu * 2 * L;
-
-      /* The form is (+/- Q1, 2P, -/+ Q), of discriminant 4 (P^2 + Q Q1) =
-         4 D.  */
-
-      for (i = 0; i <= B; i++)
-        {
-          wide_uint q, P1, t, rem;
-
-          div_smallq (q, rem, S + P, Q);
-          P1 = S - rem; /* P1 = q*Q - P */
-
-          affirm (q > 0 && Q > 0);
-
-# if STAT_SQUFOF
-          q_freq[0]++;
-          q_freq[MIN (q, Q_FREQ_SIZE)]++;
-# endif
-
-          if (Q <= L1)
-            {
-              wide_uint g = Q;
-
-              if ((Q & 1) == 0)
-                g /= 2;
-
-              g /= gcd_odd (g, mu);
-
-              if (g <= L)
-                {
-                  if (qpos >= QUEUE_SIZE)
-                    error (EXIT_FAILURE, 0, _("squfof queue overflow"));
-                  queue[qpos].Q = g;
-                  queue[qpos].P = P % g;
-                  qpos++;
-                }
-            }
-
-          /* I think the difference can be either sign, but mod
-             2^W_TYPE_SIZE arithmetic should be fine.  */
-          t = Q1 + q * (P - P1);
-          Q1 = Q;
-          Q = t;
-          P = P1;
-
-          if ((i & 1) == 0)
-            {
-              wide_uint r = is_square (Q);
-              if (r)
-                {
-                  for (int j = 0; j < qpos; j++)
-                    {
-                      if (queue[j].Q == r)
-                        {
-                          if (r == 1)
-                            /* Traversed entire cycle.  */
-                            goto next_multiplier;
-
-                          /* Need the absolute value for divisibility test.  */
-                          if (P >= queue[j].P)
-                            t = P - queue[j].P;
-                          else
-                            t = queue[j].P - P;
-                          if (t % r == 0)
-                            {
-                              /* Delete entries up to and including entry
-                                 j, which matched.  */
-                              memmove (queue, queue + j + 1,
-                                       (qpos - j - 1) * sizeof (queue[0]));
-                              qpos -= (j + 1);
-                            }
-                          goto next_i;
-                        }
-                    }
-
-                  /* We have found a square form, which should give a
-                     factor.  */
-                  Q1 = r;
-                  affirm (S >= P); /* What signs are possible?  */
-                  P += r * ((S - P) / r);
-
-                  /* Note: Paper says (N - P*P) / Q1, that seems incorrect
-                     for the case D = 2N.  */
-                  /* Compute Q = (D - P*P) / Q1, but we need double
-                     precision.  */
-                  wide_uint hi, lo;
-                  umul_ppmm (hi, lo, P, P);
-                  sub_ddmmss (hi, lo, Dh, Dl, hi, lo);
-                  udiv_qrnnd (Q, rem, hi, lo, Q1);
-                  affirm (rem == 0);
-
-                  for (;;)
-                    {
-                      /* Note: There appears to by a typo in the paper,
-                         Step 4a in the algorithm description says q <--
-                         floor([S+P]/\hat Q), but looking at the equations
-                         in Sec. 3.1, it should be q <-- floor([S+P] / Q).
-                         (In this code, \hat Q is Q1).  */
-                      div_smallq (q, rem, S + P, Q);
-                      P1 = S - rem;     /* P1 = q*Q - P */
-
-# if STAT_SQUFOF
-                      q_freq[0]++;
-                      q_freq[MIN (q, Q_FREQ_SIZE)]++;
-# endif
-                      if (P == P1)
-                        break;
-                      t = Q1 + q * (P - P1);
-                      Q1 = Q;
-                      Q = t;
-                      P = P1;
-                    }
-
-                  if ((Q & 1) == 0)
-                    Q /= 2;
-                  Q /= gcd_odd (Q, mu);
-
-                  affirm (Q > 1 && (n1 || Q < n0));
-
-                  if (prime_p (Q))
-                    factor_insert (factors, Q);
-                  else if (!factor_using_squfof (0, Q, factors))
-                    factor_using_pollard_rho (Q, 2, factors);
-
-                  divexact_21 (n1, n0, n1, n0, Q);
-
-                  if (prime2_p (n1, n0))
-                    factor_insert_large (factors, n1, n0);
-                  else
-                    {
-                      if (!factor_using_squfof (n1, n0, factors))
-                        {
-                          if (n1 == 0)
-                            factor_using_pollard_rho (n0, 1, factors);
-                          else
-                            factor_using_pollard_rho2 (n1, n0, 1, factors);
-                        }
-                    }
-
-                  return true;
-                }
-            }
-        next_i:;
-        }
-    next_multiplier:;
-    }
-  return false;
-}
-#endif
-
 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
    results in FACTORS.  */
 static void
@@ -2230,11 +1770,6 @@ factor (wide_uint t1, wide_uint t0, struct factors *factors)
     factor_insert_large (factors, t1, t0);
   else
     {
-#if USE_SQUFOF
-      if (factor_using_squfof (t1, t0, factors))
-        return;
-#endif
-
       if (t1 == 0)
         factor_using_pollard_rho (t0, 1, factors);
       else
@@ -2681,10 +2216,6 @@ main (int argc, char **argv)
 
   atexit (lbuf_flush);
 
-#if STAT_SQUFOF
-  memset (q_freq, 0, sizeof (q_freq));
-#endif
-
   bool ok;
   if (argc <= optind)
     ok = do_stdin ();
@@ -2696,20 +2227,5 @@ main (int argc, char **argv)
           ok = false;
     }
 
-#if STAT_SQUFOF
-  if (q_freq[0] > 0)
-    {
-      double acc_f;
-      printf ("q  freq.  cum. freq.(total: %d)\n", q_freq[0]);
-      for (int i = 1, acc_f = 0.0; i <= Q_FREQ_SIZE; i++)
-        {
-          double f = (double) q_freq[i] / q_freq[0];
-          acc_f += f;
-          printf ("%s%d %.2f%% %.2f%%\n", i == Q_FREQ_SIZE ? ">=" : "", i,
-                  100.0 * f, 100.0 * acc_f);
-        }
-    }
-#endif
-
   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
 }