From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sun, 1 Jun 2025 23:02:21 +0000 (-0700)
Subject: factor: use same word size as GMP
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d6ee61e407b0cfd6252a1b0bea3aef4d887dba5f;p=thirdparty%2Fcoreutils.git

factor: use same word size as GMP

Remove experimental code for 128-bit words as it does not work and
we lack time to figure out why.  Instead, ensure that words are
the same size as with GMP.
* src/factor.c (USE_INT128): Remove.  All uses removed.
(wide_uint, W_TYPE_SIZE): Define to be the same as GMP.
(MP_LIMB_MAX): New macro.  Check that it matches W_TYPE_SIZE.
(USE_LONGLONG_H): Default to true.
(UHWtype) [USE_LONGLONG_H]: Define to unsigned int, same as GMP.
(prime_p): Go back to not worrying about 128-bit words,
since GMP doesn’t worry and doesn’t use them.
(lbuf_putbitcnt): New function, since we cannot assume
that bitcnt_t fits into mp_limb_t.
(print_factors): Use it.
* src/make-prime-list.c (output_primes):
Don’t assume that wide_uint’s maximum is UINTMAX_MAX.
---

diff --git a/src/factor.c b/src/factor.c
index 50d65e4f05..fc1b6b4598 100644
--- a/src/factor.c
+++ b/src/factor.c
@@ -115,36 +115,42 @@
 /* Token delimiters when reading from a file.  */
 #define DELIM "\n\t "
 
-/* __int128 is experimental; to use it, compile with -DUSE_INT128.  */
-#ifndef USE_INT128
-# define USE_INT128 false
-#endif
-
-/* Typedefs and macros related to an unsigned type that is no narrower
-   than 32 bits and no narrower than unsigned int.  For efficiency,
-   use the widest hardware-supported type.  */
-#if USE_INT128
-typedef unsigned __int128 wide_uint;
-# define W_TYPE_SIZE 128
+/* GMP uses the unsigned integer type mp_limb_t as its word in
+   multiprecision arithmetic.  This code uses the same word for single
+   and double precision integer arithmetic.  Although previous
+   versions of this code used uintmax_t for single and double
+   precision, that introduced opportunities for bugs and was not worth
+   the hassle, as mp_limb_t and uintmax_t are invariably the same on
+   64-bit platforms, and 32-bit platforms are less important now.
+
+   Although GMP can be built with GMP_NUMB_BITS < GMP_LIMB_BITS,
+   so that some high-order bits of a word are not used, do not
+   do this in single and double precision integer arithmetic.
+   Instead, always use the full word.  */
+
+/* A word and its size in bits.  */
+typedef mp_limb_t wide_uint;
+#ifdef GMP_LIMB_BITS
+# define W_TYPE_SIZE GMP_LIMB_BITS
 #else
-typedef uintmax_t wide_uint;
-# define W_TYPE_SIZE UINTMAX_WIDTH
+/* An older GMP, or mini-gmp; guess the usual value.  */
+# define W_TYPE_SIZE ULONG_WIDTH
 #endif
-#define WIDE_UINT_MAX ((wide_uint) -1)
 
-/* Check that we are not on a theoretical (but allowed by
-   POSIX) platform where WIDE_UINT_MAX <= INT_MAX.
+/* The maximum value of a word.  */
+#define MP_LIMB_MAX ((mp_limb_t) -1)
+
+/* Check W_TYPE_SIZE's value, as it might be a guess.  */
+static_assert (MP_LIMB_MAX >> (W_TYPE_SIZE - 1) == 1);
+
+/* Check that the builder didn't specify something perverse like
+   "-DMINI_GMP_LIMB_TYPE=short -DW_TYPE_SIZE=USHRT_WIDTH".
    This could result in undefined behavior due to signed integer
    overflow if a word promotes to int.  */
-static_assert (INT_MAX < WIDE_UINT_MAX);
+static_assert (INT_MAX < MP_LIMB_MAX);
 
 #ifndef USE_LONGLONG_H
-/* With the way we use longlong.h, it's only safe to use
-   when UWtype = UHWtype, as there were various cases
-   (as can be seen in the history for longlong.h) where
-   for example, _LP64 was required to enable W_TYPE_SIZE==64 code,
-   to avoid compile time or run time issues.  */
-# define USE_LONGLONG_H (W_TYPE_SIZE == ULONG_WIDTH)
+# define USE_LONGLONG_H true
 #endif
 
 #if USE_LONGLONG_H
@@ -152,7 +158,7 @@ static_assert (INT_MAX < WIDE_UINT_MAX);
 /* Make definitions for longlong.h to make it do what it can do for us */
 
 # define UWtype  wide_uint
-# define UHWtype unsigned long int
+# define UHWtype unsigned int
 # undef UDWtype
 # if HAVE_ATTRIBUTE_MODE
 typedef unsigned int UQItype    __attribute__ ((mode (QI)));
@@ -1189,21 +1195,8 @@ prime_p (wide_uint n)
   if (n <= 1)
     return false;
 
-  wide_uint cast_out_limit
-    = (wide_uint) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME;
-
-#ifndef EXHIBIT_INT128_BUG
-  /* FIXME: Do the small-prime performance improvement only if
-     wide_uint is exactly 64 bits wide.  We don't know why the code
-     misbehaves when wide_uint is wider; e.g., when compiled with
-     'gcc -DUSE_INT128 -DEXHIBIT_INT128_BUG', 'factor' mishandles
-     340282366920938463463374607431768211355.  */
-  if (W_TYPE_SIZE != 64)
-    cast_out_limit = 2;
-#endif
-
   /* We have already cast out small primes.  */
-  if (n < cast_out_limit)
+  if (n < (wide_uint) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME)
     return true;
 
   /* Precomputation for Miller-Rabin.  */
@@ -1978,6 +1971,14 @@ lbuf_putint (wide_uint i)
 {
   lbuf_putint_append (i, lbuf_buf + sizeof lbuf_buf);
 }
+static void
+lbuf_putbitcnt (mp_bitcnt_t i)
+{
+  char *bufend = lbuf_buf + sizeof lbuf_buf;
+  for (; MP_LIMB_MAX < i; i /= 10)
+    *--bufend = '0' + i % 10;
+  lbuf_putint_append (i, bufend);
+}
 
 /* Append the string representation of T to lbuf_buf.  */
 static void
@@ -2124,7 +2125,7 @@ print_factors (char const *input)
         if (print_exponents && factors.e[j] > 1)
           {
             lbuf_putc ('^');
-            lbuf_putint (factors.e[j]);
+            lbuf_putbitcnt (factors.e[j]);
             break;
           }
       }
diff --git a/src/make-prime-list.c b/src/make-prime-list.c
index 6b53a624ee..c79eb5878a 100644
--- a/src/make-prime-list.c
+++ b/src/make-prime-list.c
@@ -146,7 +146,7 @@ output_primes (const struct prime *primes, unsigned nprimes)
         abort ();
       printf ("P (%u, %u,\n   (", primes[i].p - p, d8);
       print_wide_uint (primes[i].pinv, 0, wide_uint_bits);
-      printf ("),\n   UINTMAX_MAX / %u)\n", primes[i].p);
+      printf ("),\n   (wide_uint) -1 / %u)\n", primes[i].p);
       p = primes[i].p;
     }