]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.13] GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Wed, 20 Nov 2024 20:10:29 +0000 (21:10 +0100)
committerGitHub <noreply@github.com>
Wed, 20 Nov 2024 20:10:29 +0000 (20:10 +0000)
GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852)

Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the
filesystem encoding when quoting and unquoting file URIs, rather than
forcing use of UTF-8.

No changes are needed in the `nturl2path` module because Windows always
uses UTF-8, per PEP 529.
(cherry picked from commit c9b399fbdb01584dcfff0d7f6ad484644ff269c3)

Co-authored-by: Barney Gale <barney.gale@gmail.com>
Lib/test/test_urllib.py
Lib/test/test_urllib2.py
Lib/urllib/request.py
Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst [new file with mode: 0644]

index 2c53ce3f99e675a9b01b4554b9722b3357746916..ab18e80663e3bc4b41752e577a7f34c74e06f4bf 100644 (file)
@@ -709,10 +709,6 @@ class urlretrieve_FileTests(unittest.TestCase):
 
     def constructLocalFileUrl(self, filePath):
         filePath = os.path.abspath(filePath)
-        try:
-            filePath.encode("utf-8")
-        except UnicodeEncodeError:
-            raise unittest.SkipTest("filePath is not encodable to utf8")
         return "file://%s" % urllib.request.pathname2url(filePath)
 
     def createNewTempFile(self, data=b""):
@@ -1562,6 +1558,13 @@ class Pathname_Tests(unittest.TestCase):
         self.assertEqual(fn('/a/b.c'), '/a/b.c')
         self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_pathname2url_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
+        self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
+
     @unittest.skipUnless(sys.platform == 'win32',
                          'test specific to Windows pathnames.')
     def test_url2pathname_win(self):
@@ -1612,6 +1615,15 @@ class Pathname_Tests(unittest.TestCase):
         self.assertEqual(fn('////foo/bar'), '//foo/bar')
         self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_url2pathname_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = os_helper.FS_NONASCII
+        self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+        url = urllib.parse.quote(url, encoding=encoding, errors=errors)
+        self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+
 class Utility_Tests(unittest.TestCase):
     """Testcase to test the various utility functions in the urllib."""
 
index eed0599642edfb2fe2572fad8f49bf3a60fe19dd..068dd859f27220625b99465004b283988075ae15 100644 (file)
@@ -717,10 +717,6 @@ class OpenerDirectorTests(unittest.TestCase):
 
 
 def sanepathname2url(path):
-    try:
-        path.encode("utf-8")
-    except UnicodeEncodeError:
-        raise unittest.SkipTest("path is not encodable to utf8")
     urlpath = urllib.request.pathname2url(path)
     if os.name == "nt" and urlpath.startswith("///"):
         urlpath = urlpath[2:]
index 9eb819ca53229fd4928890f1f2a0b0d09462d566..f0321814c695098a2d50206b64586d923aed6503 100644 (file)
@@ -1660,12 +1660,16 @@ else:
             # URL has an empty authority section, so the path begins on the
             # third character.
             pathname = pathname[2:]
-        return unquote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return unquote(pathname, encoding=encoding, errors=errors)
 
     def pathname2url(pathname):
         """OS-specific conversion from a file system path to a relative URL
         of the 'file' scheme; not recommended for general use."""
-        return quote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return quote(pathname, encoding=encoding, errors=errors)
 
 
 ftpcache = {}
diff --git a/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
new file mode 100644 (file)
index 0000000..abceda8
--- /dev/null
@@ -0,0 +1,4 @@
+Fix issue where :func:`urllib.request.url2pathname` and
+:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
+unquoting file URIs. They now use the :term:`filesystem encoding and error
+handler`.