From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sun, 18 May 2025 04:42:37 +0000 (-0700)
Subject: factor: fix bug with 128-bit uintmax_t
X-Git-Tag: v9.8~321
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fe51b33859d2e7286425d6d5600288ce9ae43bd1;p=thirdparty%2Fcoreutils.git

factor: fix bug with 128-bit uintmax_t

On so-far-only-theoretical platforms with 128-bit uintmax_t,
'factor' would misbehave by not factoring enough.
Work around the bug (at a performance cost)
and document the issue.  I hope someone with
more time and expertise can fix the performance cost
that this introduces.
To reproduce the correctness bug, build with
'gcc -DUSE_INT128 -DEXHIBIT_INT128_BUG';
'make check' should fail due to the new test case.
* src/factor.c (USE_INT128): New macro.
(wide_uint, wide_int, W_TYPE_SIZE, WIDE_UINT_MAX):
Define to proper values if USE_INT128.
(prime_p) [!EXHIBIT_INT128_BUG]: Work around bug
with 128-bit wide_uint, at some performance cost.
* tests/factor/factor.pl (bug-with-128-bit-uintmax_t):
New test.
---

diff --git a/src/factor.c b/src/factor.c
index ce6158eebf..50a9803fa5 100644
--- a/src/factor.c
+++ b/src/factor.c
@@ -126,13 +126,24 @@
 /* Token delimiters when reading from a file.  */
 #define DELIM "\n\t "
 
+/* __int128 is experimental; to use it, compile with -DUSE_INT128.  */
+#ifndef USE_INT128
+# define USE_INT128 false
+#endif
+
 /* Typedefs and macros related to an unsigned type that is no narrower
    than 32 bits and no narrower than unsigned int.  For efficiency,
    use the widest hardware-supported type.  */
+#if USE_INT128
+typedef unsigned __int128 wide_uint;
+typedef __int128 wide_int;
+# define W_TYPE_SIZE 128
+#else
 typedef uintmax_t wide_uint;
 typedef intmax_t wide_int;
-#define WIDE_UINT_MAX UINTMAX_MAX
 #define W_TYPE_SIZE UINTMAX_WIDTH
+#endif
+#define WIDE_UINT_MAX ((wide_uint) -1)
 
 #ifndef USE_LONGLONG_H
 /* With the way we use longlong.h, it's only safe to use
@@ -252,7 +263,8 @@ make_uuint (wide_uint hi, wide_uint lo)
 static wide_uint const BIG_POWER_OF_10 = 10000000000000000000llu;
 enum { LOG_BIG_POWER_OF_10 = 19 };
 #else
-/* A so-far-only-theoretical platform with at-least-128-bit uintmax_t.
+/* If built with -DUSE_INT128, a platform that supports __int128; otherwise,
+   a so-far-only-theoretical platform with at-least-128-bit uintmax_t.
    This is for performance; the 64-bit mainstream code will still work.  */
 static wide_uint const BIG_POWER_OF_10 =
   (wide_uint) 10000000000000000000llu * 10000000000000000000llu;
@@ -1186,8 +1198,21 @@ prime_p (wide_uint n)
   if (n <= 1)
     return false;
 
+  wide_uint cast_out_limit
+    = (wide_uint) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME;
+
+#ifndef EXHIBIT_INT128_BUG
+  /* FIXME: Do the small-prime performance improvement only if
+     wide_uint is exactly 64 bits wide.  We don't know why the code
+     misbehaves when wide_uint is wider; e.g., when compiled with
+     'gcc -DUSE_INT128 -DEXHIBIT_INT128_BUG', 'factor' mishandles
+     340282366920938463463374607431768211355.  */
+  if (W_TYPE_SIZE != 64)
+    cast_out_limit = 2;
+#endif
+
   /* We have already cast out small primes.  */
-  if (n < (wide_uint) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
+  if (n < cast_out_limit)
     return true;
 
   /* Precomputation for Miller-Rabin.  */
diff --git a/tests/factor/factor.pl b/tests/factor/factor.pl
index c3056dc67d..a4edb7acb0 100755
--- a/tests/factor/factor.pl
+++ b/tests/factor/factor.pl
@@ -89,6 +89,9 @@ my @Tests =
      ['bug-gmp-plus_2_sup_128_plus_1',
       '+170141183460469231731687303715884105729',
       {OUT => '3 56713727820156410577229101238628035243'}],
+     ['bug-with-128-bit-uintmax_t',
+      '340282366920938463463374607431768211355',
+      {OUT => '5 31 2195370109167344925570158757624311041'}],
      ['h-1', '-h 3000', {OUT => '2^3 3 5^3'}],
      ['h-2', '3000 --exponents', {OUT => '2^3 3 5^3'}],
     );