From: Paul Eggert <eggert@cs.ucla.edu>
Date: Wed, 11 Jun 2025 00:40:34 +0000 (-0700)
Subject: factor: speed up multiprecision Pollard’s rho
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=442c164ae043b58c1aeced3816dbed55a67ca99c;p=thirdparty%2Fcoreutils.git

factor: speed up multiprecision Pollard’s rho

These changes are taken from a proposal by Torbjörn Granlund in:
https://lists.gnu.org/r/coreutils/2025-01/msg00000.html
On my x86-64 platform, they improve speed by more than 8× when
factoring 340282366920938463463374607431768211457.
* src/factor.c (mp_modadd, mp_modsub, mp_modadd_1, mp_mulredc):
New functions.
(MP_FACTOR_USING_POLLARD_RHO_N_MAX): New macro.
(mp_factor_using_pollard_rho): Act on mpn not mpz, and on
mp_limb_t not unsigned long int.  Reorder args.  All uses changed.
---

diff --git a/src/factor.c b/src/factor.c
index 3a8cc60bb9..4049297298 100644
--- a/src/factor.c
+++ b/src/factor.c
@@ -1707,100 +1707,156 @@ factor_using_pollard_rho2 (mp_limb_t n1, mp_limb_t n0, unsigned long int a,
     }
 }
 
+/* Set RP = (AP + BP) mod MP.  All values are nonnegative and take up
+   N>0 words, and AP and BP are both less than MP.  */
 static void
-mp_factor_using_pollard_rho (mpz_t n, unsigned long int a,
-                             struct mp_factors *factors)
+mp_modadd (mp_limb_t *rp, mp_limb_t const *ap, mp_limb_t const *bp,
+           mp_limb_t const *mp, mp_size_t n)
 {
-  mpz_t x, z, y, P;
-  mpz_t t, t2;
+  mp_limb_t cy = mpn_add_n (rp, ap, bp, n);
+  if (cy || mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+}
 
-  devmsg ("[pollard-rho (%lu)] ", a);
+/* Set RP = (AP - BP) mod MP.  All values are nonnegative and take up
+   N>0 words, and AP and BP are both less than MP.  */
+static void
+mp_modsub (mp_limb_t *rp, mp_limb_t const *ap, mp_limb_t const *bp,
+           mp_limb_t const *mp, mp_size_t n)
+{
+  mp_limb_t cy = mpn_sub_n (rp, ap, bp, n);
+  if (cy)
+    mpn_add_n (rp, rp, mp, n);
+}
 
-  mpz_inits (t, t2, nullptr);
-  mpz_init_set_si (y, 2);
-  mpz_init_set_si (x, 2);
-  mpz_init_set_si (z, 2);
-  mpz_init_set_ui (P, 1);
+/* Set RP = (AP - B0) mod MP.  All values are nonnegative, AP and MP
+   both take up N>0 words, and AP < MP.  */
+static void
+mp_modadd_1 (mp_limb_t *rp, mp_limb_t const *ap, mp_limb_t b0,
+             mp_limb_t const *mp, mp_size_t n)
+{
+  mp_limb_t cy = mpn_add_1 (rp, ap, n, b0);
+  if (cy || mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+}
 
-  unsigned long long int k = 1;
-  unsigned long long int l = 1;
+static void
+mp_mulredc (mp_limb_t *rp, mp_limb_t const *ap, mp_limb_t const *bp,
+            mp_limb_t const *mp, mp_size_t n, mp_limb_t m0inv, mp_limb_t *tp)
+{
+  tp[n] = mpn_mul_1 (tp, ap, n, bp[0]);
+  tp[0] = mpn_addmul_1 (tp, mp, n, tp[0] * m0inv);
 
-  while (mpz_cmp_ui (n, 1) != 0)
+  for (mp_size_t i = 1; i < n; i++)
     {
-      for (;;)
+      tp[n + i] = mpn_addmul_1 (tp + i, ap, n, bp[i]);
+      tp[i] = mpn_addmul_1 (tp + i, mp, n, tp[i] * m0inv);
+    }
+  mp_modadd (rp, tp, tp + n, mp, n);
+}
+
+/* Maximum value for N in mp_factor_using_pollard_rho,
+   to avoid integer overflow when it calls xinmalloc.  */
+#define MP_FACTOR_USING_POLLARD_RHO_N_MAX \
+  ((MIN (IDX_MAX, TYPE_MAXIMUM (mp_size_t)) - 3) / 10)
+
+static void
+mp_factor_using_pollard_rho (struct mp_factors *factors,
+                             mp_limb_t const *mp, mp_size_t n,
+                             mp_limb_t a)
+{
+  devmsg ("[pollard-rho (%lu)] ", (unsigned long int) a);
+
+  static_assert (10 * MP_FACTOR_USING_POLLARD_RHO_N_MAX + 3
+                 <= MIN (IDX_MAX, TYPE_MAXIMUM (mp_size_t)));
+  mp_limb_t *scratch = xinmalloc (10 * n + 3, sizeof *scratch);
+  mp_limb_t *qp = scratch + 2 * n + 1, *pp = qp + n + 2,
+    *xp = pp + n, *yp = xp + n, *zp = yp + n,
+    *tp = zp + n, *sp = tp + n, *gp = sp + n;
+  mp_size_t gn;
+
+  mpn_zero (scratch, n);
+  scratch[n] = 1;
+  mpn_tdiv_qr (qp, pp, 0, scratch, n + 1, mp, n);
+
+  mp_modadd (xp, pp, pp, mp, n);
+  mpn_copyi (yp, xp, n);
+  mpn_copyi (zp, xp, n);
+
+  mp_limb_t m0inv = binv_limb (-mp[0]);
+
+  for (unsigned long int k = 1; ; k *= 2)
+    {
+      for (unsigned long int i = k; i != 0; i--)
         {
-          do
-            {
-              mpz_mul (t, x, x);
-              mpz_mod (x, t, n);
-              mpz_add_ui (x, x, a);
+          mp_mulredc (tp, xp, xp, mp, n, m0inv, scratch);
+          mp_modadd_1 (xp, tp, a, mp, n);
 
-              mpz_sub (t, z, x);
-              mpz_mul (t2, P, t);
-              mpz_mod (P, t2, n);
+          mp_modsub (tp, zp, xp, mp, n);
+          mp_mulredc (pp, pp, tp, mp, n, m0inv, scratch);
 
-              if (k % 32 == 1)
+          if (i % 128 == 1)
+            {
+              if (mpn_zero_p (pp, n))
                 {
-                  mpz_gcd (t, P, n);
-                  if (mpz_cmp_ui (t, 1) != 0)
-                    goto factor_found;
-                  mpz_set (y, x);
+                  mp_factor_using_pollard_rho (factors, mp, n, a + 1);
+                  goto finish;
                 }
+              mpn_copyi (tp, pp, n);
+              mpn_copyi (sp, mp, n);
+              gn = mpn_gcd (gp, tp, n, sp, n);
+              if (gn != 1 || gp[0] != 1)
+                goto factor_found;
+              mpn_copyi (yp, xp, n);
             }
-          while (--k != 0);
-
-          mpz_set (z, x);
-          k = l;
-          l = 2 * l;
-          for (unsigned long long int i = 0; i < k; i++)
-            {
-              mpz_mul (t, x, x);
-              mpz_mod (x, t, n);
-              mpz_add_ui (x, x, a);
-            }
-          mpz_set (y, x);
         }
 
-    factor_found:
-      do
+      mpn_copyi (zp, xp, n);
+      for (unsigned long int i = 2 * k; i != 0; i--)
         {
-          mpz_mul (t, y, y);
-          mpz_mod (y, t, n);
-          mpz_add_ui (y, y, a);
-
-          mpz_sub (t, z, y);
-          mpz_gcd (t, t, n);
+          mp_mulredc (tp, xp, xp, mp, n, m0inv, scratch);
+          mp_modadd_1 (xp, tp, a, mp, n);
         }
-      while (mpz_cmp_ui (t, 1) == 0);
+      mpn_copyi (yp, xp, n);
+    }
 
-      mpz_divexact (n, n, t);   /* divide by t, before t is overwritten */
+ factor_found:
+  do
+    {
+      mp_mulredc (tp, yp, yp, mp, n, m0inv, scratch);
+      mp_modadd_1 (yp, tp, a, mp, n);
+      mp_modsub (tp, zp, yp, mp, n);
+      mpn_copyi (sp, mp, n);
+      gn = mpn_gcd (gp, tp, n, sp, n);
+    }
+  while (gn == 1 && gp[0] == 1);
 
-      if (!mp_finish_in_single (t, factors))
-        {
-          if (mp_prime_p (t))
-            mp_factor_insert (factors, t, 1);
-          else
-            {
-              devmsg ("[composite factor--restarting pollard-rho] ");
-              mp_factor_using_pollard_rho (t, a + 1, factors);
-            }
-        }
+  mpz_t g = MPZ_ROINIT_N (gp, gn);
+  if (!mp_finish_in_single (g, factors))
+    {
+      if (mp_prime_p (g))
+        mp_factor_insert (factors, g, 1);
+      else
+        mp_factor_using_pollard_rho (factors, gp, gn, a + 1);
+    }
 
-      if (mp_finish_in_single (n, factors))
-        break;
+  mpn_tdiv_qr (qp, tp, 0, mp, n, gp, gn);	/* could use divexact */
+  mp_size_t qn = n - gn + (qp[n - 1] != 0);
+  mpz_t q = MPZ_ROINIT_N (qp, qn);
 
-      if (mp_prime_p (n))
+  if (!mp_finish_in_single (q, factors))
+    {
+      if (mp_prime_p (q))
+        mp_factor_insert (factors, q, 1);
+      else
         {
-          mp_factor_insert (factors, n, 1);
-          break;
+          devmsg ("[composite factor--restarting pollard-rho] ");
+          mp_factor_using_pollard_rho (factors, qp, qn, a + 1);
         }
-
-      mpz_mod (x, x, n);
-      mpz_mod (z, z, n);
-      mpz_mod (y, y, n);
     }
 
-  mpz_clears (P, t2, t, z, x, y, nullptr);
+ finish:
+  free (scratch);
 }
 
 /* Compute the prime factors of the 128-bit number (T1,T0), and put the
@@ -1853,7 +1909,8 @@ mp_factor (mpz_t t)
           if (mp_prime_p (t))
             mp_factor_insert (&factors, t, 1);
           else
-            mp_factor_using_pollard_rho (t, 1, &factors);
+            mp_factor_using_pollard_rho (&factors, mpz_limbs_read (t),
+                                         mp_size (t), 1);
         }
     }
 
@@ -2168,6 +2225,8 @@ print_factors (char const *input)
   devmsg ("[using arbitrary-precision arithmetic] ");
   mpz_t t;
   mpz_init_set_str (t, str, 10);
+  if (MP_FACTOR_USING_POLLARD_RHO_N_MAX < mp_size (t))
+    xalloc_die ();
 
   lbuf_putmpz (t);
   lbuf_putc (':');