]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] gh-143925: Reject control characters in data: URL mediatypes (#144111)
authorSeth Michael Larson <seth@python.org>
Sun, 25 Jan 2026 17:06:01 +0000 (11:06 -0600)
committerGitHub <noreply@github.com>
Sun, 25 Jan 2026 17:06:01 +0000 (17:06 +0000)
(cherry picked from commit f25509e78e8be6ea73c811ac2b8c928c28841b9f)

Lib/test/test_urllib.py
Lib/urllib/request.py
Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst [new file with mode: 0644]

index 7e3607842fdbdda89f9666a24a9a824152fa097c..76a878f6f533facb05e0cd097849d7c460738736 100644 (file)
@@ -12,6 +12,7 @@ from test import support
 from test.support import os_helper
 from test.support import socket_helper
 from test.support import warnings_helper
+from test.support import control_characters_c0
 from test.support.testcase import ExtraAssertions
 import os
 try:
@@ -677,6 +678,13 @@ class urlopen_DataTests(unittest.TestCase, ExtraAssertions):
         # missing padding character
         self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
 
+    def test_invalid_mediatype(self):
+        for c0 in control_characters_c0():
+            self.assertRaises(ValueError,urllib.request.urlopen,
+                              f'data:text/html;{c0},data')
+        for c0 in control_characters_c0():
+            self.assertRaises(ValueError,urllib.request.urlopen,
+                              f'data:text/html{c0};base64,ZGF0YQ==')
 
 class urlretrieve_FileTests(unittest.TestCase):
     """Test urllib.urlretrieve() on local files"""
index 21d76913febfa1743ed22e6c010239d75c869845..3d864f1d92fe7c06130775e924c0a60cf9d3d7a5 100644 (file)
@@ -1636,6 +1636,11 @@ class DataHandler(BaseHandler):
         scheme, data = url.split(":",1)
         mediatype, data = data.split(",",1)
 
+        # Disallow control characters within mediatype.
+        if re.search(r"[\x00-\x1F\x7F]", mediatype):
+            raise ValueError(
+                "Control characters not allowed in data: mediatype")
+
         # even base64 encoded data URLs might be quoted so unquote in any case:
         data = unquote_to_bytes(data)
         if mediatype.endswith(";base64"):
diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst
new file mode 100644 (file)
index 0000000..46109df
--- /dev/null
@@ -0,0 +1 @@
+Reject control characters in ``data:`` URL media types.