.. versionadded:: 3.13
+ .. versionchanged:: next
+ If a URL authority (e.g. a hostname) is present and resolves to a local
+ address, it is discarded. If an authority is present and *doesn't*
+ resolve to a local address, then on Windows a UNC path is returned (as
+ before), and on other platforms a :exc:`ValueError` is raised.
+
.. method:: Path.as_uri()
>>> 'file:' + pathname2url(path)
'file:///C:/Program%20Files'
- .. versionchanged:: 3.14
- Paths beginning with a slash are converted to URLs with authority
- sections. For example, the path ``/etc/hosts`` is converted to
- the URL ``///etc/hosts``.
-
.. versionchanged:: 3.14
Windows drive letters are no longer converted to uppercase, and ``:``
characters not following a drive letter no longer cause an
:exc:`OSError` exception to be raised on Windows.
+ .. versionchanged:: 3.14
+ Paths beginning with a slash are converted to URLs with authority
+ sections. For example, the path ``/etc/hosts`` is converted to
+ the URL ``///etc/hosts``.
+
.. function:: url2pathname(url)
characters not following a drive letter no longer cause an
:exc:`OSError` exception to be raised on Windows.
+ .. versionchanged:: next
+ This function calls :func:`socket.gethostbyname` if the URL authority
+ isn't empty or ``localhost``. If the authority resolves to a local IP
+ address then it is discarded; otherwise, on Windows a UNC path is
+ returned (as before), and on other platforms a
+ :exc:`~urllib.error.URLError` is raised.
+
.. function:: getproxies()
supporting SHA-256 digest authentication as specified in :rfc:`7616`.
(Contributed by Calvin Bui in :gh:`128193`.)
+* Improve standards compliance when parsing and emitting ``file:`` URLs.
+
+ In :func:`urllib.request.url2pathname`:
+
+ - Discard URL authorities that resolve to a local IP address.
+ - Raise :exc:`~urllib.error.URLError` if a URL authority doesn't resolve
+ to ``localhost``, except on Windows where we return a UNC path.
+
+ In :func:`urllib.request.pathname2url`:
+
+ - Include an empty URL authority when a path begins with a slash. For
+ example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``.
+
+ On Windows, drive letters are no longer converted to uppercase, and ``:``
+ characters not following a drive letter no longer cause an :exc:`OSError`
+ exception to be raised.
+
+ (Contributed by Barney Gale in :gh:`125866`.)
+
uuid
----
"""Return a new path from the given 'file' URI."""
if not uri.startswith('file:'):
raise ValueError(f"URI does not start with 'file:': {uri!r}")
+ from urllib.error import URLError
from urllib.request import url2pathname
- path = cls(url2pathname(uri.removeprefix('file:')))
+ try:
+ path = cls(url2pathname(uri.removeprefix('file:')))
+ except URLError as exc:
+ raise ValueError(exc.reason) from None
if not path.is_absolute():
raise ValueError(f"URI is not absolute: {uri!r}")
return path
def test_from_uri_posix(self):
P = self.cls
self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
- self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
+ self.assertRaises(ValueError, P.from_uri, 'file://foo/bar')
self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))
self.assertEqual(P.from_uri('file://localhost/foo/bar'), P('/foo/bar'))
+ if not is_wasi:
+ self.assertEqual(P.from_uri('file://127.0.0.1/foo/bar'), P('/foo/bar'))
+ self.assertEqual(P.from_uri(f'file://{socket.gethostname()}/foo/bar'),
+ P('/foo/bar'))
self.assertRaises(ValueError, P.from_uri, 'foo/bar')
self.assertRaises(ValueError, P.from_uri, '/foo/bar')
self.assertRaises(ValueError, P.from_uri, '//foo/bar')
from test.support import os_helper
from test.support import socket_helper
import os
+import socket
try:
import ssl
except ImportError:
"url2pathname() failed; %s != %s" %
(expect, result))
+ def test_pathname2url(self):
+ # Test cases common to Windows and POSIX.
+ fn = urllib.request.pathname2url
+ sep = os.path.sep
+ self.assertEqual(fn(''), '')
+ self.assertEqual(fn(sep), '///')
+ self.assertEqual(fn('a'), 'a')
+ self.assertEqual(fn(f'a{sep}b.c'), 'a/b.c')
+ self.assertEqual(fn(f'{sep}a{sep}b.c'), '///a/b.c')
+ self.assertEqual(fn(f'{sep}a{sep}b%#c'), '///a/b%25%23c')
+
@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
def test_pathname2url_win(self):
'test specific to POSIX pathnames')
def test_pathname2url_posix(self):
fn = urllib.request.pathname2url
- self.assertEqual(fn('/'), '///')
- self.assertEqual(fn('/a/b.c'), '///a/b.c')
self.assertEqual(fn('//a/b.c'), '////a/b.c')
self.assertEqual(fn('///a/b.c'), '/////a/b.c')
self.assertEqual(fn('////a/b.c'), '//////a/b.c')
- self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c')
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
def test_pathname2url_nonascii(self):
url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
+ def test_url2pathname(self):
+ # Test cases common to Windows and POSIX.
+ fn = urllib.request.url2pathname
+ sep = os.path.sep
+ self.assertEqual(fn(''), '')
+ self.assertEqual(fn('/'), f'{sep}')
+ self.assertEqual(fn('///'), f'{sep}')
+ self.assertEqual(fn('////'), f'{sep}{sep}')
+ self.assertEqual(fn('foo'), 'foo')
+ self.assertEqual(fn('foo/bar'), f'foo{sep}bar')
+ self.assertEqual(fn('/foo/bar'), f'{sep}foo{sep}bar')
+ self.assertEqual(fn('//localhost/foo/bar'), f'{sep}foo{sep}bar')
+ self.assertEqual(fn('///foo/bar'), f'{sep}foo{sep}bar')
+ self.assertEqual(fn('////foo/bar'), f'{sep}{sep}foo{sep}bar')
+
@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
def test_url2pathname_win(self):
fn = urllib.request.url2pathname
- self.assertEqual(fn('/'), '\\')
self.assertEqual(fn('/C:/'), 'C:\\')
self.assertEqual(fn("///C|"), 'C:')
self.assertEqual(fn("///C:"), 'C:')
'test specific to POSIX pathnames')
def test_url2pathname_posix(self):
fn = urllib.request.url2pathname
- self.assertEqual(fn('/foo/bar'), '/foo/bar')
- self.assertEqual(fn('//foo/bar'), '//foo/bar')
- self.assertEqual(fn('///foo/bar'), '/foo/bar')
- self.assertEqual(fn('////foo/bar'), '//foo/bar')
- self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar')
+ self.assertRaises(urllib.error.URLError, fn, '//foo/bar')
+ self.assertRaises(urllib.error.URLError, fn, '//localhost:/foo/bar')
+ self.assertRaises(urllib.error.URLError, fn, '//:80/foo/bar')
+ self.assertRaises(urllib.error.URLError, fn, '//:/foo/bar')
+ self.assertRaises(urllib.error.URLError, fn, '//c:80/foo/bar')
+ self.assertEqual(fn('//127.0.0.1/foo/bar'), '/foo/bar')
+ self.assertEqual(fn(f'//{socket.gethostname()}/foo/bar'), '/foo/bar')
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
def test_url2pathname_nonascii(self):
return [part.strip() for part in res]
class FileHandler(BaseHandler):
- # Use local file or FTP depending on form of URL
- def file_open(self, req):
- url = req.selector
- if url[:2] == '//' and url[2:3] != '/' and (req.host and
- req.host != 'localhost'):
- if not req.host in self.get_names():
- raise URLError("file:// scheme is supported only on localhost")
- else:
- return self.open_local_file(req)
-
# names for the localhost
names = None
def get_names(self):
def open_local_file(self, req):
import email.utils
import mimetypes
- host = req.host
- filename = req.selector
+ filename = _splittype(req.full_url)[1]
localfile = url2pathname(filename)
try:
stats = os.stat(localfile)
headers = email.message_from_string(
'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
(mtype or 'text/plain', size, modified))
- if host:
- host, port = _splitport(host)
- if not host or \
- (not port and _safe_gethostbyname(host) in self.get_names()):
- origurl = 'file:' + pathname2url(localfile)
- return addinfourl(open(localfile, 'rb'), headers, origurl)
+ origurl = f'file:{pathname2url(localfile)}'
+ return addinfourl(open(localfile, 'rb'), headers, origurl)
except OSError as exp:
raise URLError(exp, exp.filename)
- raise URLError('file not on local host')
-def _safe_gethostbyname(host):
+ file_open = open_local_file
+
+def _is_local_authority(authority):
+ if not authority or authority == 'localhost':
+ return True
try:
- return socket.gethostbyname(host)
- except socket.gaierror:
- return None
+ address = socket.gethostbyname(authority)
+ except (socket.gaierror, AttributeError):
+ return False
+ return address in FileHandler().get_names()
class FTPHandler(BaseHandler):
def ftp_open(self, req):
def url2pathname(url):
"""OS-specific conversion from a relative URL of the 'file' scheme
to a file system path; not recommended for general use."""
- if url[:3] == '///':
- # Empty authority section, so the path begins on the third character.
- url = url[2:]
- elif url[:12] == '//localhost/':
- # Skip past 'localhost' authority.
- url = url[11:]
-
+ authority, url = _splithost(url)
if os.name == 'nt':
- if url[:3] == '///':
- # Skip past extra slash before UNC drive in URL path.
+ if not _is_local_authority(authority):
+ # e.g. file://server/share/file.txt
+ url = '//' + authority + url
+ elif url[:3] == '///':
+ # e.g. file://///server/share/file.txt
url = url[1:]
else:
if url[:1] == '/' and url[2:3] in (':', '|'):
# Older URLs use a pipe after a drive letter
url = url[:1] + ':' + url[2:]
url = url.replace('/', '\\')
+ elif not _is_local_authority(authority):
+ raise URLError("file:// scheme is supported only on localhost")
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
return unquote(url, encoding=encoding, errors=errors)
--- /dev/null
+Fix issue where :func:`urllib.request.url2pathname` mishandled file URLs with
+authorities. If an authority is present and resolves to ``localhost``, it is
+now discarded. If an authority is present but *doesn't* resolve to
+``localhost``, then on Windows a UNC path is returned (as before), and on
+other platforms a :exc:`urllib.error.URLError` is now raised.