]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-85168: Use filesystem encoding when converting to/from `file` URIs (#126852)
authorBarney Gale <barney.gale@gmail.com>
Tue, 19 Nov 2024 21:19:30 +0000 (21:19 +0000)
committerGitHub <noreply@github.com>
Tue, 19 Nov 2024 21:19:30 +0000 (21:19 +0000)
Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the
filesystem encoding when quoting and unquoting file URIs, rather than
forcing use of UTF-8.

No changes are needed in the `nturl2path` module because Windows always
uses UTF-8, per PEP 529.

Lib/test/test_urllib.py
Lib/test/test_urllib2.py
Lib/urllib/request.py
Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst [new file with mode: 0644]

index 71084a462d0af55890c9cbde91426a98bd3dc5e2..c66b1c49c316e6cbf89e2c315455a4f0edab7190 100644 (file)
@@ -609,10 +609,6 @@ class urlretrieve_FileTests(unittest.TestCase):
 
     def constructLocalFileUrl(self, filePath):
         filePath = os.path.abspath(filePath)
-        try:
-            filePath.encode("utf-8")
-        except UnicodeEncodeError:
-            raise unittest.SkipTest("filePath is not encodable to utf8")
         return "file://%s" % urllib.request.pathname2url(filePath)
 
     def createNewTempFile(self, data=b""):
@@ -1462,6 +1458,13 @@ class Pathname_Tests(unittest.TestCase):
         self.assertEqual(fn('/a/b.c'), '/a/b.c')
         self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_pathname2url_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
+        self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
+
     @unittest.skipUnless(sys.platform == 'win32',
                          'test specific to Windows pathnames.')
     def test_url2pathname_win(self):
@@ -1512,6 +1515,15 @@ class Pathname_Tests(unittest.TestCase):
         self.assertEqual(fn('////foo/bar'), '//foo/bar')
         self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_url2pathname_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = os_helper.FS_NONASCII
+        self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+        url = urllib.parse.quote(url, encoding=encoding, errors=errors)
+        self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+
 class Utility_Tests(unittest.TestCase):
     """Testcase to test the various utility functions in the urllib."""
 
index b90ccc2f125b93f2ad4fe66de17f9de92d6e2e60..99ad11cf0552ebd7802de8b6e7b628a66ace8f3c 100644 (file)
@@ -718,10 +718,6 @@ class OpenerDirectorTests(unittest.TestCase):
 
 
 def sanepathname2url(path):
-    try:
-        path.encode("utf-8")
-    except UnicodeEncodeError:
-        raise unittest.SkipTest("path is not encodable to utf8")
     urlpath = urllib.request.pathname2url(path)
     if os.name == "nt" and urlpath.startswith("///"):
         urlpath = urlpath[2:]
index 5c061a245179e0a8dda971bacacc997a86a55c26..bcfdcc51fac369d23fd411fe43ec4223ebca45c2 100644 (file)
@@ -1657,12 +1657,16 @@ else:
             # URL has an empty authority section, so the path begins on the
             # third character.
             pathname = pathname[2:]
-        return unquote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return unquote(pathname, encoding=encoding, errors=errors)
 
     def pathname2url(pathname):
         """OS-specific conversion from a file system path to a relative URL
         of the 'file' scheme; not recommended for general use."""
-        return quote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return quote(pathname, encoding=encoding, errors=errors)
 
 
 # Utility functions
diff --git a/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
new file mode 100644 (file)
index 0000000..abceda8
--- /dev/null
@@ -0,0 +1,4 @@
+Fix issue where :func:`urllib.request.url2pathname` and
+:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
+unquoting file URIs. They now use the :term:`filesystem encoding and error
+handler`.