]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.12] GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Tue, 19 Nov 2024 21:55:10 +0000 (22:55 +0100)
committerGitHub <noreply@github.com>
Tue, 19 Nov 2024 21:55:10 +0000 (21:55 +0000)
GH-85168: Use filesystem encoding when converting to/from `file` URIs (GH-126852)

Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the
filesystem encoding when quoting and unquoting file URIs, rather than
forcing use of UTF-8.

No changes are needed in the `nturl2path` module because Windows always
uses UTF-8, per PEP 529.
(cherry picked from commit c9b399fbdb01584dcfff0d7f6ad484644ff269c3)

Co-authored-by: Barney Gale <barney.gale@gmail.com>
Lib/test/test_urllib.py
Lib/test/test_urllib2.py
Lib/urllib/request.py
Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst [new file with mode: 0644]

index 15a698cc10f21734c2d85e322a4a009f00548105..b06855e7b3a930f6fc13d34a4e14456d3bb61604 100644 (file)
@@ -718,10 +718,6 @@ class urlretrieve_FileTests(unittest.TestCase):
 
     def constructLocalFileUrl(self, filePath):
         filePath = os.path.abspath(filePath)
-        try:
-            filePath.encode("utf-8")
-        except UnicodeEncodeError:
-            raise unittest.SkipTest("filePath is not encodable to utf8")
         return "file://%s" % urllib.request.pathname2url(filePath)
 
     def createNewTempFile(self, data=b""):
@@ -1571,6 +1567,13 @@ class Pathname_Tests(unittest.TestCase):
         self.assertEqual(fn('/a/b.c'), '/a/b.c')
         self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_pathname2url_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
+        self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
+
     @unittest.skipUnless(sys.platform == 'win32',
                          'test specific to Windows pathnames.')
     def test_url2pathname_win(self):
@@ -1621,6 +1624,15 @@ class Pathname_Tests(unittest.TestCase):
         self.assertEqual(fn('////foo/bar'), '//foo/bar')
         self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
 
+    @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+    def test_url2pathname_nonascii(self):
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        url = os_helper.FS_NONASCII
+        self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+        url = urllib.parse.quote(url, encoding=encoding, errors=errors)
+        self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+
 class Utility_Tests(unittest.TestCase):
     """Testcase to test the various utility functions in the urllib."""
 
index 69cf1dc7aef19fa52274ad2a125e3fc07b9beb0c..12b1053aa23bab52a75e4c7b94284804ba6e337f 100644 (file)
@@ -716,10 +716,6 @@ class OpenerDirectorTests(unittest.TestCase):
 
 
 def sanepathname2url(path):
-    try:
-        path.encode("utf-8")
-    except UnicodeEncodeError:
-        raise unittest.SkipTest("path is not encodable to utf8")
     urlpath = urllib.request.pathname2url(path)
     if os.name == "nt" and urlpath.startswith("///"):
         urlpath = urlpath[2:]
index 178c9795e19c6e48cff8472d0afeb9af56506413..c89e217b9de1b7b0f8262cec54a41c1361ae5f8f 100644 (file)
@@ -1685,12 +1685,16 @@ else:
             # URL has an empty authority section, so the path begins on the
             # third character.
             pathname = pathname[2:]
-        return unquote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return unquote(pathname, encoding=encoding, errors=errors)
 
     def pathname2url(pathname):
         """OS-specific conversion from a file system path to a relative URL
         of the 'file' scheme; not recommended for general use."""
-        return quote(pathname)
+        encoding = sys.getfilesystemencoding()
+        errors = sys.getfilesystemencodeerrors()
+        return quote(pathname, encoding=encoding, errors=errors)
 
 
 ftpcache = {}
diff --git a/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
new file mode 100644 (file)
index 0000000..abceda8
--- /dev/null
@@ -0,0 +1,4 @@
+Fix issue where :func:`urllib.request.url2pathname` and
+:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
+unquoting file URIs. They now use the :term:`filesystem encoding and error
+handler`.