From: Victor Stinner Date: Tue, 20 Jun 2017 14:20:36 +0000 (+0200) Subject: bpo-30500: urllib: Simplify splithost by calling into urlparse. (#1849) (#2294) X-Git-Tag: v2.7.14rc1~89 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d4324baca4c03eb8d55446cd1b74b32ec5633af5;p=thirdparty%2FPython%2Fcpython.git bpo-30500: urllib: Simplify splithost by calling into urlparse. (#1849) (#2294) The current regex based splitting produces a wrong result. For example:: http://abc#@def Web browsers parse that URL as ``http://abc/#@def``, that is, the host is ``abc``, the path is ``/``, and the fragment is ``#@def``. (cherry picked from commit 90e01e50ef8a9e6c91f30d965563c378a4ad26de) --- diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 14de91e13dad..1ce9201c0693 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -879,6 +879,26 @@ class Utility_Tests(unittest.TestCase): self.assertEqual(splithost('/foo/bar/baz.html'), (None, '/foo/bar/baz.html')) + # bpo-30500: # starts a fragment. + self.assertEqual(splithost('//127.0.0.1#@host.com'), + ('127.0.0.1', '/#@host.com')) + self.assertEqual(splithost('//127.0.0.1#@host.com:80'), + ('127.0.0.1', '/#@host.com:80')) + self.assertEqual(splithost('//127.0.0.1:80#@host.com'), + ('127.0.0.1:80', '/#@host.com')) + + # Empty host is returned as empty string. + self.assertEqual(splithost("///file"), + ('', '/file')) + + # Trailing semicolon, question mark and hash symbol are kept. + self.assertEqual(splithost("//example.net/file;"), + ('example.net', '/file;')) + self.assertEqual(splithost("//example.net/file?"), + ('example.net', '/file?')) + self.assertEqual(splithost("//example.net/file#"), + ('example.net', '/file#')) + def test_splituser(self): splituser = urllib.splituser self.assertEqual(splituser('User:Pass@www.python.org:080'), diff --git a/Lib/urllib.py b/Lib/urllib.py index c3c8ef4b6004..d85504a5cb7e 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1093,8 +1093,7 @@ def splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: - import re - _hostprog = re.compile('^//([^/?]*)(.*)$') + _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL) match = _hostprog.match(url) if match: diff --git a/Misc/ACKS b/Misc/ACKS index a411bc5ffc8f..35c77b67a6c8 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -993,6 +993,7 @@ Chad Netzer Max Neunhöffer George Neville-Neil Hieu Nguyen +Nam Nguyen Johannes Nicolai Samuel Nicolary Jonathan Niehof diff --git a/Misc/NEWS b/Misc/NEWS index 361a9d3c762a..f85e829db101 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -52,6 +52,11 @@ Extension Modules Library ------- +- [Security] bpo-30500: Fix urllib.splithost() to correctly parse + fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now + correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com`` + as the host in an authentification (``login@host``). + - [Security] bpo-29591: Update expat copy from 2.1.1 to 2.2.0 to get fixes of CVE-2016-0718 and CVE-2016-4472. See https://sourceforge.net/p/expat/bugs/537/ for more information.