From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sat, 21 Jun 2025 18:41:11 +0000 (-0700)
Subject: factor: don’t prove primality
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5037ce930c243e6fc06f5b879daece31d4628da4;p=thirdparty%2Fcoreutils.git

factor: don’t prove primality

Suggested for consideration by Torbjörn Granlund in:
https://lists.gnu.org/r/coreutils/2025-01/msg00000.html
* src/factor.c (PROVE_PRIMALITY): Now defaults to false.
(mp_prime_p): Help the compiler by telling it mpz_prob_prime_p
returns nonnegative.
* tests/factor/create-test.sh (bigprime): Test 2^400 - 593,
since that’s now practical.
* tests/local.mk (factor_tests): Add new test.
---

diff --git a/NEWS b/NEWS
index 87ba2659cf..eda971bb6a 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** Changes in behavior
+
+  'factor' is now much faster at identifying large prime numbers.
+
 ** Bug fixes
 
   cksum was not compilable by Apple LLVM 10.0.0 x86-64, which
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 89a0b59723..0648463dfb 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -19180,11 +19180,21 @@ takes about 14 seconds, and the slower algorithm would have taken
 about 750 ms to factor @math{2^{127} - 3} instead of the 50 ms needed by
 the faster algorithm.
 
-Factoring large numbers is, in general, hard.  The Pollard-Brent rho
+Factoring large numbers is, in general, hard.  The Pollard--Brent rho
 algorithm used by @command{factor} is particularly effective for
-numbers with relatively small factors.  If you wish to factor large
-numbers which do not have small factors (for example, numbers which
-are the product of two large primes), other methods are far better.
+numbers with relatively small factors.  Other methods are far better
+for factoring large composite numbers that lack relatively small
+factors, such as numbers that are the product of two large primes.
+
+When testing whether a number is prime, @command{factor} uses the
+Baillie--PSW primality heuristic for speed.  Although Baillie--PSW has
+not been proven to reject all composite numbers, the probability of a
+Baillie--PSW pseudoprime is astronomically small and no such
+pseudoprime has been discovered despite decades of searching by
+mathematicians.  Any such pseudoprime is greater than @math{2^{64}}.
+For more, see: Baillie R, Fiori A, Wagstaff Jr SS.@: Strengthening the
+Baillie--PSW primality test.@: @i{Math Comp}.@: 2021;90:1931--1955.@:
+@uref{https://doi.org/10.1090/mcom/3616}.
 
 @exitstatus
 
diff --git a/src/factor.c b/src/factor.c
index b5656f00e9..19f75802e1 100644
--- a/src/factor.c
+++ b/src/factor.c
@@ -80,13 +80,15 @@
       pre-inversion (such as GMP's invert_limb) and udiv_qrnnd_preinv (from
       GMP's gmp-impl.h).  The redcify2 function could be vastly improved using
       similar methods.  These functions currently dominate run time when
-      PROVE_PRIMALITY is nonzero (the default).
+      PROVE_PRIMALITY is true (which is not the default).
 */
 
 /* Whether to recursively factor to prove primality,
-   or run faster probabilistic tests.  */
+   or run faster probabilistic tests.
+   FIXME: Simplify the code by assuming PROVE_PRIMALITY is false,
+   and remove PROVE_PRIMALITY.  */
 #ifndef PROVE_PRIMALITY
-# define PROVE_PRIMALITY 1
+# define PROVE_PRIMALITY false
 #endif
 
 
@@ -1523,6 +1525,7 @@ mp_prime_p (mpz_t n)
     return true;
 
   int probab_prime = mpz_probab_prime_p (n, MR_REPS);
+  assume (0 <= probab_prime);
   if (probab_prime == 0)
     return false;
   if (flag_prove_primality < probab_prime)
diff --git a/tests/factor/create-test.sh b/tests/factor/create-test.sh
index fc84b14843..7fad40cdcc 100755
--- a/tests/factor/create-test.sh
+++ b/tests/factor/create-test.sh
@@ -27,6 +27,10 @@ t2=170141183460469229545748130981302223887
 # https://bugs.gnu.org/73474
 bug73474=22222222222222222202111121111
 
+# 2^400 - 593
+bigprime=25822498780869085896559191720030118743297057928292235128306593565406\
+47622016841194629645353280137831435903171972747492783
+
 # Each test is a triple: lo, hi, sha1 of result.
 # The test script, run.sh, runs seq lo hi|factor|sha1sum
 # and verifies that the actual and expected checksums are the same.
@@ -70,6 +74,7 @@ case $t in
   t35) set   ${q}958336   ${q}960335 2374919a89196e1fce93adfe779cb4664556d4b6 ;;
   t36) set   ${q}960336   ${q}962335 569e4363e8d9e8830a187d9ab27365eef08abde1 ;;
   t37) set    $bug73474    $bug73474 61d04aaf757acc5a37eb1d5581a98eea78ef50e8 ;;
+  t38) set    $bigprime    $bigprime 02f3c51a2896ff4524fd76de5f5854029879a179 ;;
   *)
     echo "$0: error: unknown test: '$test_name' -> '$t'" >&2
     exit 1
diff --git a/tests/local.mk b/tests/local.mk
index 03114f7592..dd07032dab 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -765,7 +765,7 @@ factor_tests = \
   $(tf)/t20.sh $(tf)/t21.sh $(tf)/t22.sh $(tf)/t23.sh $(tf)/t24.sh \
   $(tf)/t25.sh $(tf)/t26.sh $(tf)/t27.sh $(tf)/t28.sh $(tf)/t29.sh \
   $(tf)/t30.sh $(tf)/t31.sh $(tf)/t32.sh $(tf)/t33.sh $(tf)/t34.sh \
-  $(tf)/t35.sh $(tf)/t36.sh $(tf)/t37.sh
+  $(tf)/t35.sh $(tf)/t36.sh $(tf)/t37.sh $(tf)/t38.sh
 
 $(factor_tests): $(tf)/run.sh $(tf)/create-test.sh
 	$(AM_V_GEN)$(MKDIR_P) $(tf)