]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-81790: support "UNC" device paths in `ntpath.splitdrive()` (GH-91882)
authorBarney Gale <barney.gale@gmail.com>
Fri, 10 Jun 2022 15:59:55 +0000 (16:59 +0100)
committerGitHub <noreply@github.com>
Fri, 10 Jun 2022 15:59:55 +0000 (16:59 +0100)
Doc/library/os.path.rst
Lib/ntpath.py
Lib/pathlib.py
Lib/test/test_ntpath.py
Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst [new file with mode: 0644]

index ce7913e3712d7304f5ae4c667e926b0f8917e8ef..85989ef32d4911912d18c25a03296e53cae0d0bf 100644 (file)
@@ -469,7 +469,7 @@ the :mod:`glob` module.)
       ("c:", "/dir")
 
    If the path contains a UNC path, drive will contain the host name
-   and share, up to but not including the fourth separator::
+   and share::
 
       >>> splitdrive("//host/computer/dir")
       ("//host/computer", "/dir")
index 73b1bd12ddca76709b7188db2968b61027b2d896..959bcd0983118678f63916bec932c88b59806585 100644 (file)
@@ -172,17 +172,23 @@ def splitdrive(p):
             sep = b'\\'
             altsep = b'/'
             colon = b':'
+            unc_prefix = b'\\\\?\\UNC'
         else:
             sep = '\\'
             altsep = '/'
             colon = ':'
+            unc_prefix = '\\\\?\\UNC'
         normp = p.replace(altsep, sep)
         if (normp[0:2] == sep*2) and (normp[2:3] != sep):
             # is a UNC path:
             # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
             # \\machine\mountpoint\directory\etc\...
             #           directory ^^^^^^^^^^^^^^^
-            index = normp.find(sep, 2)
+            if normp[:8].upper().rstrip(sep) == unc_prefix:
+                start = 8
+            else:
+                start = 2
+            index = normp.find(sep, start)
             if index == -1:
                 return p[:0], p
             index2 = normp.find(sep, index + 1)
index 26da4eda92719d55a1cfd672656e29216d8de1fb..bb440c9d57216a1af0c754df1e31fe1c6af078c4 100644 (file)
@@ -120,68 +120,18 @@ class _WindowsFlavour(_Flavour):
 
     is_supported = (os.name == 'nt')
 
-    drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
-    ext_namespace_prefix = '\\\\?\\'
-
     reserved_names = (
         {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
         {'COM%s' % c for c in '123456789\xb9\xb2\xb3'} |
         {'LPT%s' % c for c in '123456789\xb9\xb2\xb3'}
         )
 
-    # Interesting findings about extended paths:
-    # * '\\?\c:\a' is an extended path, which bypasses normal Windows API
-    #   path processing. Thus relative paths are not resolved and slash is not
-    #   translated to backslash. It has the native NT path limit of 32767
-    #   characters, but a bit less after resolving device symbolic links,
-    #   such as '\??\C:' => '\Device\HarddiskVolume2'.
-    # * '\\?\c:/a' looks for a device named 'C:/a' because slash is a
-    #   regular name character in the object namespace.
-    # * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems.
-    #   The only path separator at the filesystem level is backslash.
-    # * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and
-    #   thus limited to MAX_PATH.
-    # * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH,
-    #   even with the '\\?\' prefix.
-
     def splitroot(self, part, sep=sep):
-        first = part[0:1]
-        second = part[1:2]
-        if (second == sep and first == sep):
-            # XXX extended paths should also disable the collapsing of "."
-            # components (according to MSDN docs).
-            prefix, part = self._split_extended_path(part)
-            first = part[0:1]
-            second = part[1:2]
+        drv, rest = self.pathmod.splitdrive(part)
+        if drv[:1] == sep or rest[:1] == sep:
+            return drv, sep, rest.lstrip(sep)
         else:
-            prefix = ''
-        third = part[2:3]
-        if (second == sep and first == sep and third != sep):
-            # is a UNC path:
-            # vvvvvvvvvvvvvvvvvvvvv root
-            # \\machine\mountpoint\directory\etc\...
-            #            directory ^^^^^^^^^^^^^^
-            index = part.find(sep, 2)
-            if index != -1:
-                index2 = part.find(sep, index + 1)
-                # a UNC path can't have two slashes in a row
-                # (after the initial two)
-                if index2 != index + 1:
-                    if index2 == -1:
-                        index2 = len(part)
-                    if prefix:
-                        return prefix + part[1:index2], sep, part[index2+1:]
-                    else:
-                        return part[:index2], sep, part[index2+1:]
-        drv = root = ''
-        if second == ':' and first in self.drive_letters:
-            drv = part[:2]
-            part = part[2:]
-            first = third
-        if first == sep:
-            root = first
-            part = part.lstrip(sep)
-        return prefix + drv, root, part
+            return drv, '', rest
 
     def casefold(self, s):
         return s.lower()
@@ -192,16 +142,6 @@ class _WindowsFlavour(_Flavour):
     def compile_pattern(self, pattern):
         return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch
 
-    def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):
-        prefix = ''
-        if s.startswith(ext_prefix):
-            prefix = s[:4]
-            s = s[4:]
-            if s.startswith('UNC\\'):
-                prefix += s[:3]
-                s = '\\' + s[3:]
-        return prefix, s
-
     def is_reserved(self, parts):
         # NOTE: the rules for reserved names seem somewhat complicated
         # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
index ab3603bdd73015b51e7eb9a1d92c89ee3056903e..d51946322c80560755b26bc43e0c4b3727025cf7 100644 (file)
@@ -117,6 +117,31 @@ class TestNtpath(NtpathTestCase):
         # Issue #19911: UNC part containing U+0130
         self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'),
                          ('//conky/MOUNTPOİNT', '/foo/bar'))
+        # gh-81790: support device namespace, including UNC drives.
+        tester('ntpath.splitdrive("//?/c:")', ("//?/c:", ""))
+        tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/"))
+        tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir"))
+        tester('ntpath.splitdrive("//?/UNC")', ("", "//?/UNC"))
+        tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/"))
+        tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", ""))
+        tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", ""))
+        tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir"))
+        tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")',
+               ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam'))
+        tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/"))
+
+        tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", ""))
+        tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\"))
+        tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir"))
+        tester('ntpath.splitdrive("\\\\?\\UNC")', ("", "\\\\?\\UNC"))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\"))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", ""))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", ""))
+        tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")',
+               ("\\\\?\\UNC\\server\\share", "\\dir"))
+        tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")',
+               ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam'))
+        tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\"))
 
     def test_split(self):
         tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar'))
diff --git a/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst
new file mode 100644 (file)
index 0000000..8894493
--- /dev/null
@@ -0,0 +1,2 @@
+:func:`os.path.splitdrive` now understands DOS device paths with UNC
+links (beginning ``\\?\UNC\``). Contributed by Barney Gale.