From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Fri, 23 Jan 2026 11:45:00 +0000 (+0100) Subject: [3.14] gh-143925: Reject control characters in data: URL mediatypes (#144084) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=05356b1cc153108aaf27f3b72ce438af4aa218c0;p=thirdparty%2FPython%2Fcpython.git [3.14] gh-143925: Reject control characters in data: URL mediatypes (#144084) Co-authored-by: Seth Michael Larson --- diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index ae524c5ffba6..2dd739b77b8e 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -10,6 +10,7 @@ import unittest from test import support from test.support import os_helper from test.support import socket_helper +from test.support import control_characters_c0 import os import socket try: @@ -590,6 +591,13 @@ class urlopen_DataTests(unittest.TestCase): # missing padding character self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=') + def test_invalid_mediatype(self): + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html;{c0},data') + for c0 in control_characters_c0(): + self.assertRaises(ValueError,urllib.request.urlopen, + f'data:text/html{c0};base64,ZGF0YQ==') class urlretrieve_FileTests(unittest.TestCase): """Test urllib.urlretrieve() on local files""" diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 566b8087aec2..8d7470a22739 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1634,6 +1634,11 @@ class DataHandler(BaseHandler): scheme, data = url.split(":",1) mediatype, data = data.split(",",1) + # Disallow control characters within mediatype. + if re.search(r"[\x00-\x1F\x7F]", mediatype): + raise ValueError( + "Control characters not allowed in data: mediatype") + # even base64 encoded data URLs might be quoted so unquote in any case: data = unquote_to_bytes(data) if mediatype.endswith(";base64"): diff --git a/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst new file mode 100644 index 000000000000..46109dfbef3e --- /dev/null +++ b/Misc/NEWS.d/next/Security/2026-01-16-11-51-19.gh-issue-143925.mrtcHW.rst @@ -0,0 +1 @@ +Reject control characters in ``data:`` URL media types.