From: Victor Stinner Date: Tue, 11 Jun 2019 10:45:35 +0000 (+0200) Subject: [2.7] bpo-36742: Fix urlparse.urlsplit() error message for Unicode URL (GH-13937) X-Git-Tag: v2.7.17rc1~58 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2b578479b96aa3deeeb8bac313a02b5cf3cb1aff;p=thirdparty%2FPython%2Fcpython.git [2.7] bpo-36742: Fix urlparse.urlsplit() error message for Unicode URL (GH-13937) If urlparse.urlsplit() detects an invalid netloc according to NFKC normalization, the error message type is now str rather than unicode, and use repr() to format the URL, to prevent when display the error message. --- diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 857ed96d92fe..86c4a0595c4f 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -656,6 +656,15 @@ class UrlParseTestCase(unittest.TestCase): with self.assertRaises(ValueError): urlparse.urlsplit(url) + # check error message: invalid netloc must be formated with repr() + # to get an ASCII error message + with self.assertRaises(ValueError) as cm: + urlparse.urlsplit(u'http://example.com\uFF03@bing.com') + self.assertEqual(str(cm.exception), + "netloc u'example.com\\uff03@bing.com' contains invalid characters " + "under NFKC normalization") + self.assertIsInstance(cm.exception.args[0], str) + def test_main(): test_support.run_unittest(UrlParseTestCase) diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 6834f3c1798b..798b467b605f 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -180,8 +180,9 @@ def _checknetloc(netloc): return for c in '/?#@:': if c in netloc2: - raise ValueError(u"netloc '" + netloc + u"' contains invalid " + - u"characters under NFKC normalization") + raise ValueError("netloc %r contains invalid characters " + "under NFKC normalization" + % netloc) def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: diff --git a/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst new file mode 100644 index 000000000000..3ba774056f15 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst @@ -0,0 +1,3 @@ +:func:`urlparse.urlsplit` error message for invalid ``netloc`` according to +NFKC normalization is now a :class:`str` string, rather than a +:class:`unicode` string, to prevent error when displaying the error.