.. versionadded:: 3.13
.. versionchanged:: next
- If a URL authority (e.g. a hostname) is present and resolves to a local
- address, it is discarded. If an authority is present and *doesn't*
- resolve to a local address, then on Windows a UNC path is returned (as
- before), and on other platforms a :exc:`ValueError` is raised.
+ The URL authority is discarded if it matches the local hostname.
+ Otherwise, if the authority isn't empty or ``localhost``, then on
+ Windows a UNC path is returned (as before), and on other platforms a
+ :exc:`ValueError` is raised.
.. method:: Path.as_uri()
the URL ``///etc/hosts``.
.. versionchanged:: next
- The *add_scheme* argument was added.
+ The *add_scheme* parameter was added.
-.. function:: url2pathname(url, *, require_scheme=False)
+.. function:: url2pathname(url, *, require_scheme=False, resolve_host=False)
Convert the given ``file:`` URL to a local path. This function uses
:func:`~urllib.parse.unquote` to decode the URL.
value should include the prefix; a :exc:`~urllib.error.URLError` is raised
if it doesn't.
+ The URL authority is discarded if it is empty, ``localhost``, or the local
+ hostname. Otherwise, if *resolve_host* is set to true, the authority is
+ resolved using :func:`socket.gethostbyname` and discarded if it matches a
+ local IP address (as per :rfc:`RFC 8089 §3 <8089#section-3>`). If the
+ authority is still unhandled, then on Windows a UNC path is returned, and
+ on other platforms a :exc:`~urllib.error.URLError` is raised.
+
This example shows the function being used on Windows::
>>> from urllib.request import url2pathname
:exc:`OSError` exception to be raised on Windows.
.. versionchanged:: next
- This function calls :func:`socket.gethostbyname` if the URL authority
- isn't empty, ``localhost``, or the machine hostname. If the authority
- resolves to a local IP address then it is discarded; otherwise, on
+ The URL authority is discarded if it matches the local hostname.
+ Otherwise, if the authority isn't empty or ``localhost``, then on
Windows a UNC path is returned (as before), and on other platforms a
:exc:`~urllib.error.URLError` is raised.
.. versionchanged:: next
- The *require_scheme* argument was added.
+ The *require_scheme* and *resolve_host* parameters were added.
.. function:: getproxies()
- Accept a complete URL when the new *require_scheme* argument is set to
true.
- - Discard URL authorities that resolve to a local IP address.
- - Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve
- to a local IP address, except on Windows where we return a UNC path.
+ - Discard URL authority if it matches the local hostname.
+ - Discard URL authority if it resolves to a local IP address when the new
+ *resolve_host* argument is set to true.
+ - Raise :exc:`~urllib.error.URLError` if a URL authority isn't local,
+ except on Windows where we return a UNC path as before.
In :func:`urllib.request.pathname2url`:
self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))
self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar'))
if not is_wasi:
- self.assertEqual(P.from_uri('file://127.0.0.1/foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri(f'file://{socket.gethostname()}/foo/bar'),
P('/foo/bar'))
self.assertRaises(ValueError, P.from_uri, 'foo/bar')
urllib.request.url2pathname(url, require_scheme=True),
expected_path)
- error_subtests = [
+ def test_url2pathname_require_scheme_errors(self):
+ subtests = [
'',
':',
'foo',
'data:file:foo',
'data:file://foo',
]
- for url in error_subtests:
+ for url in subtests:
with self.subTest(url=url):
self.assertRaises(
urllib.error.URLError,
urllib.request.url2pathname,
url, require_scheme=True)
+ def test_url2pathname_resolve_host(self):
+ fn = urllib.request.url2pathname
+ sep = os.path.sep
+ self.assertEqual(fn('//127.0.0.1/foo/bar', resolve_host=True), f'{sep}foo{sep}bar')
+ self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), f'{sep}foo{sep}bar')
+ self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar', resolve_host=True), f'{sep}foo{sep}bar')
+
@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
def test_url2pathname_win(self):
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file')
self.assertEqual(fn('/////server/path/to/file'), '\\\\server\\path\\to\\file')
+ self.assertEqual(fn('//127.0.0.1/path/to/file'), '\\\\127.0.0.1\\path\\to\\file')
# Localhost paths
self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file')
self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file')
self.assertRaises(urllib.error.URLError, fn, '//:80/foo/bar')
self.assertRaises(urllib.error.URLError, fn, '//:/foo/bar')
self.assertRaises(urllib.error.URLError, fn, '//c:80/foo/bar')
- self.assertEqual(fn('//127.0.0.1/foo/bar'), '/foo/bar')
- self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), '/foo/bar')
+ self.assertRaises(urllib.error.URLError, fn, '//127.0.0.1/foo/bar')
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
def test_url2pathname_nonascii(self):
def open_local_file(self, req):
import email.utils
import mimetypes
- localfile = url2pathname(req.full_url, require_scheme=True)
+ localfile = url2pathname(req.full_url, require_scheme=True, resolve_host=True)
try:
stats = os.stat(localfile)
size = stats.st_size
file_open = open_local_file
-def _is_local_authority(authority):
+def _is_local_authority(authority, resolve):
# Compare hostnames
if not authority or authority == 'localhost':
return True
if authority == hostname:
return True
# Compare IP addresses
+ if not resolve:
+ return False
try:
address = socket.gethostbyname(authority)
- except (socket.gaierror, AttributeError):
+ except (socket.gaierror, AttributeError, UnicodeEncodeError):
return False
return address in FileHandler().get_names()
return addinfourl(io.BytesIO(data), headers, url)
-# Code move from the old urllib module
+# Code moved from the old urllib module
-def url2pathname(url, *, require_scheme=False):
+def url2pathname(url, *, require_scheme=False, resolve_host=False):
"""Convert the given file URL to a local file system path.
The 'file:' scheme prefix must be omitted unless *require_scheme*
is set to true.
+
+ The URL authority may be resolved with gethostbyname() if
+ *resolve_host* is set to true.
"""
if require_scheme:
scheme, url = _splittype(url)
raise URLError("URL is missing a 'file:' scheme")
authority, url = _splithost(url)
if os.name == 'nt':
- if not _is_local_authority(authority):
+ if not _is_local_authority(authority, resolve_host):
# e.g. file://server/share/file.txt
url = '//' + authority + url
elif url[:3] == '///':
# Older URLs use a pipe after a drive letter
url = url[:1] + ':' + url[2:]
url = url.replace('/', '\\')
- elif not _is_local_authority(authority):
+ elif not _is_local_authority(authority, resolve_host):
raise URLError("file:// scheme is supported only on localhost")
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
-Fix issue where :func:`urllib.request.url2pathname` mishandled file URLs with
-authorities. If an authority is present and resolves to ``localhost``, it is
-now discarded. If an authority is present but *doesn't* resolve to
-``localhost``, then on Windows a UNC path is returned (as before), and on
-other platforms a :exc:`urllib.error.URLError` is now raised.
+Add *resolve_host* keyword-only parameter to
+:func:`urllib.request.url2pathname`, and fix handling of file URLs with
+authorities.