gh-95865: Further reduce quote_from_bytes memory consumption (#96860)

author Gregory P. Smith <greg@krypto.org>

Mon, 19 Sep 2022 23:06:25 +0000 (16:06 -0700)

committer GitHub <noreply@github.com>

Mon, 19 Sep 2022 23:06:25 +0000 (16:06 -0700)
author Gregory P. Smith <greg@krypto.org>
Mon, 19 Sep 2022 23:06:25 +0000 (16:06 -0700)
committer GitHub <noreply@github.com>
Mon, 19 Sep 2022 23:06:25 +0000 (16:06 -0700)
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py

index 2f629c72ae784e6f8c32ef166dc911f65d089833..81d6018bd1a4953f0510f98758bcb1ccdd06bc7b 100644 (file)
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -985,6 +985,10 @@ class UrlParseTestCase(unittest.TestCase):
          self.assertEqual(result, 'archaeological%20arcana')
          result = urllib.parse.quote_from_bytes(b'')
          self.assertEqual(result, '')
+        result = urllib.parse.quote_from_bytes(b'A'*10_000)
+        self.assertEqual(result, 'A'*10_000)
+        result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
+        self.assertEqual(result, 'z%01/%20'*253_183)
  
      def test_unquote_to_bytes(self):
          result = urllib.parse.unquote_to_bytes('abc%20def')
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py

index f25c770068bdd1cf4071dace6d559d4118938a0e..3734c73948c69549afa491b79a26996490f767ca 100644 (file)
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -29,6 +29,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
  
  from collections import namedtuple
  import functools
+import math
  import re
  import types
  import warnings
@@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'):
      if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
          return bs.decode()
      quoter = _byte_quoter_factory(safe)
-    return ''.join(map(quoter, bs))
+    if (bs_len := len(bs)) < 200_000:
+        return ''.join(map(quoter, bs))
+    else:
+        # This saves memory - https://github.com/python/cpython/issues/95865
+        chunk_size = math.isqrt(bs_len)
+        chunks = [''.join(map(quoter, bs[i:i+chunk_size]))
+                  for i in range(0, bs_len, chunk_size)]
+        return ''.join(chunks)
  
  def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
                quote_via=quote_plus):
diff --git a/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst b/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst

new file mode 100644 (file)

index 0000000..03a5be7
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst
@@ -0,0 +1,3 @@
+Reduce :func:`urllib.parse.quote_from_bytes` memory use on large values.
+
+Contributed by Dennis Sweeney.
author	Gregory P. Smith <greg@krypto.org>
	Mon, 19 Sep 2022 23:06:25 +0000 (16:06 -0700)
committer	GitHub <noreply@github.com>
	Mon, 19 Sep 2022 23:06:25 +0000 (16:06 -0700)
Lib/test/test_urlparse.py		patch \| blob \| blame \| history
Lib/urllib/parse.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst	[new file with mode: 0644]	patch \| blob