]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-126601: `pathname2url()`: handle NTFS alternate data streams (#126760)
authorBarney Gale <barney.gale@gmail.com>
Fri, 22 Nov 2024 00:29:05 +0000 (00:29 +0000)
committerGitHub <noreply@github.com>
Fri, 22 Nov 2024 00:29:05 +0000 (00:29 +0000)
Adjust `pathname2url()` to encode embedded colon characters in Windows
paths, rather than bailing out with an `OSError`.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
Doc/library/urllib.request.rst
Lib/nturl2path.py
Lib/test/test_urllib.py
Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst [new file with mode: 0644]

index cdd58b84a995b7d667250587551eb24d0eba3771..e0831bf7e65ad28f1da1dbfdda8f33862187ef1c 100644 (file)
@@ -152,6 +152,11 @@ The :mod:`urllib.request` module defines the following functions:
    the path component of a URL.  This does not produce a complete URL.  The return
    value will already be quoted using the :func:`~urllib.parse.quote` function.
 
+   .. versionchanged:: 3.14
+      On Windows, ``:`` characters not following a drive letter are quoted. In
+      previous versions, :exc:`OSError` was raised if a colon character was
+      found in any position other than the second character.
+
 
 .. function:: url2pathname(path)
 
index 255eb2f547c2ce1b66c5d13d2d1e8bb239cc1c8d..ed7880fd1a775f25bdaf97de73ae3aa706f9e082 100644 (file)
@@ -40,6 +40,7 @@ def pathname2url(p):
     #   C:\foo\bar\spam.foo
     # becomes
     #   ///C:/foo/bar/spam.foo
+    import ntpath
     import urllib.parse
     # First, clean up some special forms. We are going to sacrifice
     # the additional information anyway
@@ -48,16 +49,13 @@ def pathname2url(p):
         p = p[4:]
         if p[:4].upper() == 'UNC/':
             p = '//' + p[4:]
-        elif p[1:2] != ':':
-            raise OSError('Bad path: ' + p)
-    if not ':' in p:
-        # No DOS drive specified, just quote the pathname
-        return urllib.parse.quote(p)
-    comp = p.split(':', maxsplit=2)
-    if len(comp) != 2 or len(comp[0]) > 1:
-        error = 'Bad path: ' + p
-        raise OSError(error)
+    drive, tail = ntpath.splitdrive(p)
+    if drive[1:] == ':':
+        # DOS drive specified. Add three slashes to the start, producing
+        # an authority section with a zero-length authority, and a path
+        # section starting with a single slash.
+        drive = f'///{drive.upper()}'
 
-    drive = urllib.parse.quote(comp[0].upper())
-    tail = urllib.parse.quote(comp[1])
-    return '///' + drive + ':' + tail
+    drive = urllib.parse.quote(drive, safe='/:')
+    tail = urllib.parse.quote(tail)
+    return drive + tail
index c66b1c49c316e6cbf89e2c315455a4f0edab7190..3e5dc256d317a7c7237f291754f5888a45f1d34f 100644 (file)
@@ -1429,8 +1429,9 @@ class Pathname_Tests(unittest.TestCase):
         self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
         self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
         self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo")
-        # Long drive letter
-        self.assertRaises(IOError, fn, "XX:\\")
+        # NTFS alternate data streams
+        self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
+        self.assertEqual(fn('foo:bar'), 'foo%3Abar')
         # No drive letter
         self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
         self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
diff --git a/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
new file mode 100644 (file)
index 0000000..11e2b73
--- /dev/null
@@ -0,0 +1,3 @@
+Fix issue where :func:`urllib.request.pathname2url` raised :exc:`OSError`
+when given a Windows path containing a colon character not following a
+drive letter, such as before an NTFS alternate data stream.