gh-118164: Break a loop between _pydecimal and _pylong and optimize int to str conver...

author Serhiy Storchaka <storchaka@gmail.com>

Sun, 5 May 2024 05:20:06 +0000 (08:20 +0300)

committer GitHub <noreply@github.com>

Sun, 5 May 2024 05:20:06 +0000 (08:20 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Sun, 5 May 2024 05:20:06 +0000 (08:20 +0300)
committer GitHub <noreply@github.com>
Sun, 5 May 2024 05:20:06 +0000 (08:20 +0300)
diff --git a/Lib/_pylong.py b/Lib/_pylong.py

index 936346e187ff69956cf6730b35937b36b47371ea..30bee6fc9ef54ca6f76c15d09bffd59f59a91e09 100644 (file)
--- a/Lib/_pylong.py
+++ b/Lib/_pylong.py
@@ -14,6 +14,10 @@ maximum performance, they should use something like gmpy2."""
  
  import re
  import decimal
+try:
+    import _decimal
+except ImportError:
+    _decimal = None
  
  
  def int_to_decimal(n):
@@ -82,7 +86,47 @@ def int_to_decimal(n):
  
  def int_to_decimal_string(n):
      """Asymptotically fast conversion of an 'int' to a decimal string."""
-    return str(int_to_decimal(n))
+    w = n.bit_length()
+    if w > 450_000 and _decimal is not None:
+        # It is only usable with the C decimal implementation.
+        # _pydecimal.py calls str() on very large integers, which in its
+        # turn calls int_to_decimal_string(), causing very deep recursion.
+        return str(int_to_decimal(n))
+
+    # Fallback algorithm for the case when the C decimal module isn't
+    # available.  This algorithm is asymptotically worse than the algorithm
+    # using the decimal module, but better than the quadratic time
+    # implementation in longobject.c.
+    def inner(n, w):
+        if w <= 1000:
+            return str(n)
+        w2 = w >> 1
+        d = pow10_cache.get(w2)
+        if d is None:
+            d = pow10_cache[w2] = 5**w2 << w2 # 10**i = (5*2)**i = 5**i * 2**i
+        hi, lo = divmod(n, d)
+        return inner(hi, w - w2) + inner(lo, w2).zfill(w2)
+
+    # The estimation of the number of decimal digits.
+    # There is no harm in small error.  If we guess too large, there may
+    # be leading 0's that need to be stripped.  If we guess too small, we
+    # may need to call str() recursively for the remaining highest digits,
+    # which can still potentially be a large integer. This is manifested
+    # only if the number has way more than 10**15 digits, that exceeds
+    # the 52-bit physical address limit in both Intel64 and AMD64.
+    w = int(w * 0.3010299956639812 + 1)  # log10(2)
+    pow10_cache = {}
+    if n < 0:
+        n = -n
+        sign = '-'
+    else:
+        sign = ''
+    s = inner(n, w)
+    if s[0] == '0' and n:
+        # If our guess of w is too large, there may be leading 0's that
+        # need to be stripped.
+        s = s.lstrip('0')
+    return sign + s
  
  
  def _str_to_int_inner(s):
diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py

index 47fc50a0e2034972569f3b9288afd634a2d0696d..c8626398b35b896adfa8740b307e27e75fbbfcd1 100644 (file)
--- a/Lib/test/test_int.py
+++ b/Lib/test/test_int.py
@@ -829,17 +829,28 @@ class PyLongModuleTests(unittest.TestCase):
          sys.set_int_max_str_digits(self._previous_limit)
          super().tearDown()
  
-    def test_pylong_int_to_decimal(self):
-        n = (1 << 100_000) - 1
-        suffix = '9883109375'
+    def _test_pylong_int_to_decimal(self, n, suffix):
          s = str(n)
-        assert s[-10:] == suffix
-        s = str(-n)
-        assert s[-10:] == suffix
-        s = '%d' % n
-        assert s[-10:] == suffix
-        s = b'%d' % n
-        assert s[-10:] == suffix.encode('ascii')
+        self.assertEqual(s[-10:], suffix)
+        s2 = str(-n)
+        self.assertEqual(s2, '-' + s)
+        s3 = '%d' % n
+        self.assertEqual(s3, s)
+        s4 = b'%d' % n
+        self.assertEqual(s4, s.encode('ascii'))
+
+    def test_pylong_int_to_decimal(self):
+        self._test_pylong_int_to_decimal((1 << 100_000), '9883109376')
+        self._test_pylong_int_to_decimal((1 << 100_000) - 1, '9883109375')
+        self._test_pylong_int_to_decimal(10**30_000, '0000000000')
+        self._test_pylong_int_to_decimal(10**30_000 - 1, '9999999999')
+        self._test_pylong_int_to_decimal(3**60_000, '9313200001')
+
+    @support.requires_resource('cpu')
+    def test_pylong_int_to_decimal_2(self):
+        self._test_pylong_int_to_decimal(2**1_000_000, '2747109376')
+        self._test_pylong_int_to_decimal(10**300_000, '0000000000')
+        self._test_pylong_int_to_decimal(3**600_000, '3132000001')
  
      def test_pylong_int_divmod(self):
          n = (1 << 100_000)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst

new file mode 100644 (file)

index 0000000..5eb3b6f
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst
@@ -0,0 +1,4 @@
+Break a loop between the Python implementation of the :mod:`decimal` module
+and the Python code for integer to string conversion. Also optimize integer
+to string conversion for values in the range from 9_000 to 135_000 decimal
+digits.
author	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 5 May 2024 05:20:06 +0000 (08:20 +0300)
committer	GitHub <noreply@github.com>
	Sun, 5 May 2024 05:20:06 +0000 (08:20 +0300)
Lib/_pylong.py		patch \| blob \| blame \| history
Lib/test/test_int.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Core and Builtins/2024-05-02-15-57-07.gh-issue-118164.AF6kwI.rst	[new file with mode: 0644]	patch \| blob