]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Add `pathlib._abc.PathModuleBase` (#113893)
authorBarney Gale <barney.gale@gmail.com>
Sun, 14 Jan 2024 21:49:53 +0000 (21:49 +0000)
committerGitHub <noreply@github.com>
Sun, 14 Jan 2024 21:49:53 +0000 (21:49 +0000)
Path modules provide a subset of the `os.path` API, specifically those
functions needed to provide `PurePathBase` functionality. Each
`PurePathBase` subclass references its path module via a `pathmod` class
attribute.

This commit adds a new `PathModuleBase` class, which provides abstract
methods that unconditionally raise `UnsupportedOperation`. An instance of
this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`.
As a result, `PurePathBase` is no longer POSIX-y by default, and
all its methods raise `UnsupportedOperation` courtesy of `pathmod`.

Users who subclass `PurePathBase` or `PathBase` should choose the path
syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their
own subclass of `PathModuleBase`, as circumstances demand.

Lib/pathlib/__init__.py
Lib/pathlib/_abc.py
Lib/test/test_pathlib/test_pathlib.py
Lib/test/test_pathlib/test_pathlib_abc.py

index e70cfe91d322bc3541bcfdf9dcde133066049d02..f14d35bb0038d011732f302a03a58bcdb840be85 100644 (file)
@@ -33,6 +33,15 @@ __all__ = [
     ]
 
 
+# Reference for Windows paths can be found at
+# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
+_WIN_RESERVED_NAMES = frozenset(
+    {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
+    {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
+    {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
+)
+
+
 class _PathParents(Sequence):
     """This object provides sequence-like access to the logical ancestors
     of a path.  Don't try to construct it yourself."""
@@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase):
     """
 
     __slots__ = (
+        # The `_raw_paths` slot stores unnormalized string paths. This is set
+        # in the `__init__()` method.
+        '_raw_paths',
+
         # The `_drv`, `_root` and `_tail_cached` slots store parsed and
         # normalized parts of the path. They are set when any of the `drive`,
         # `root` or `_tail` properties are accessed for the first time. The
@@ -141,6 +154,26 @@ class PurePath(_abc.PurePathBase):
         # Avoid calling super().__init__, as an optimisation
         self._raw_paths = paths
 
+    def joinpath(self, *pathsegments):
+        """Combine this path with one or several arguments, and return a
+        new path representing either a subpath (if all arguments are relative
+        paths) or a totally different path (if one of the arguments is
+        anchored).
+        """
+        return self.with_segments(self, *pathsegments)
+
+    def __truediv__(self, key):
+        try:
+            return self.with_segments(self, key)
+        except TypeError:
+            return NotImplemented
+
+    def __rtruediv__(self, key):
+        try:
+            return self.with_segments(key, self)
+        except TypeError:
+            return NotImplemented
+
     def __reduce__(self):
         # Using the parts tuple helps share interned path parts
         # when pickling related paths.
@@ -386,6 +419,33 @@ class PurePath(_abc.PurePathBase):
             other = self.with_segments(other)
         return other == self or other in self.parents
 
+    def is_absolute(self):
+        """True if the path is absolute (has both a root and, if applicable,
+        a drive)."""
+        if self.pathmod is posixpath:
+            # Optimization: work with raw paths on POSIX.
+            for path in self._raw_paths:
+                if path.startswith('/'):
+                    return True
+            return False
+        return self.pathmod.isabs(self)
+
+    def is_reserved(self):
+        """Return True if the path contains one of the special names reserved
+        by the system, if any."""
+        if self.pathmod is not ntpath or not self.name:
+            return False
+
+        # NOTE: the rules for reserved names seem somewhat complicated
+        # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
+        # exist). We err on the side of caution and return True for paths
+        # which are not considered reserved by Windows.
+        if self.drive.startswith('\\\\'):
+            # UNC paths are never reserved.
+            return False
+        name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
+        return name.upper() in _WIN_RESERVED_NAMES
+
     def as_uri(self):
         """Return the path as a URI."""
         if not self.is_absolute():
index a6956f251f8b7ea6a92a6e4274079bfacbdd1bb1..1fdca004d6b31f4606b5a7d02ca3f281226964c1 100644 (file)
@@ -12,7 +12,6 @@ resemble pathlib's PurePath and Path respectively.
 """
 
 import functools
-import posixpath
 from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
 from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
 
@@ -20,14 +19,6 @@ from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
 # Internals
 #
 
-# Reference for Windows paths can be found at
-# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
-_WIN_RESERVED_NAMES = frozenset(
-    {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
-    {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
-    {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
-)
-
 _WINERROR_NOT_READY = 21  # drive exists but is not accessible
 _WINERROR_INVALID_NAME = 123  # fix for bpo-35306
 _WINERROR_CANT_RESOLVE_FILENAME = 1921  # broken symlink pointing to itself
@@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError):
     pass
 
 
+class PathModuleBase:
+    """Base class for path modules, which do low-level path manipulation.
+
+    Path modules provide a subset of the os.path API, specifically those
+    functions needed to provide PurePathBase functionality. Each PurePathBase
+    subclass references its path module via a 'pathmod' class attribute.
+
+    Every method in this base class raises an UnsupportedOperation exception.
+    """
+
+    @classmethod
+    def _unsupported(cls, attr):
+        raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported")
+
+    @property
+    def sep(self):
+        """The character used to separate path components."""
+        self._unsupported('sep')
+
+    def join(self, path, *paths):
+        """Join path segments."""
+        self._unsupported('join()')
+
+    def split(self, path):
+        """Split the path into a pair (head, tail), where *head* is everything
+        before the final path separator, and *tail* is everything after.
+        Either part may be empty.
+        """
+        self._unsupported('split()')
+
+    def splitroot(self, path):
+        """Split the pathname path into a 3-item tuple (drive, root, tail),
+        where *drive* is a device name or mount point, *root* is a string of
+        separators after the drive, and *tail* is everything after the root.
+        Any part may be empty."""
+        self._unsupported('splitroot()')
+
+    def normcase(self, path):
+        """Normalize the case of the path."""
+        self._unsupported('normcase()')
+
+    def isabs(self, path):
+        """Returns whether the path is absolute, i.e. unaffected by the
+        current directory or drive."""
+        self._unsupported('isabs()')
+
+
 class PurePathBase:
     """Base class for pure path objects.
 
@@ -154,19 +192,19 @@ class PurePathBase:
     """
 
     __slots__ = (
-        # The `_raw_paths` slot stores unnormalized string paths. This is set
-        # in the `__init__()` method.
-        '_raw_paths',
+        # The `_raw_path` slot store a joined string path. This is set in the
+        # `__init__()` method.
+        '_raw_path',
 
         # The '_resolving' slot stores a boolean indicating whether the path
         # is being processed by `PathBase.resolve()`. This prevents duplicate
         # work from occurring when `resolve()` calls `stat()` or `readlink()`.
         '_resolving',
     )
-    pathmod = posixpath
+    pathmod = PathModuleBase()
 
-    def __init__(self, *paths):
-        self._raw_paths = paths
+    def __init__(self, path, *paths):
+        self._raw_path = self.pathmod.join(path, *paths) if paths else path
         self._resolving = False
 
     def with_segments(self, *pathsegments):
@@ -176,11 +214,6 @@ class PurePathBase:
         """
         return type(self)(*pathsegments)
 
-    @property
-    def _raw_path(self):
-        """The joined but unnormalized path."""
-        return self.pathmod.join(*self._raw_paths)
-
     def __str__(self):
         """Return the string representation of the path, suitable for
         passing to system calls."""
@@ -194,7 +227,7 @@ class PurePathBase:
     @property
     def drive(self):
         """The drive prefix (letter or UNC path), if any."""
-        return self.pathmod.splitdrive(self._raw_path)[0]
+        return self.pathmod.splitroot(self._raw_path)[0]
 
     @property
     def root(self):
@@ -210,7 +243,7 @@ class PurePathBase:
     @property
     def name(self):
         """The final path component, if any."""
-        return self.pathmod.basename(self._raw_path)
+        return self.pathmod.split(self._raw_path)[1]
 
     @property
     def suffix(self):
@@ -251,10 +284,10 @@ class PurePathBase:
 
     def with_name(self, name):
         """Return a new path with the file name changed."""
-        dirname = self.pathmod.dirname
-        if dirname(name):
+        split = self.pathmod.split
+        if split(name)[0]:
             raise ValueError(f"Invalid name {name!r}")
-        return self.with_segments(dirname(self._raw_path), name)
+        return self.with_segments(split(self._raw_path)[0], name)
 
     def with_stem(self, stem):
         """Return a new path with the stem changed."""
@@ -336,17 +369,17 @@ class PurePathBase:
         paths) or a totally different path (if one of the arguments is
         anchored).
         """
-        return self.with_segments(*self._raw_paths, *pathsegments)
+        return self.with_segments(self._raw_path, *pathsegments)
 
     def __truediv__(self, key):
         try:
-            return self.joinpath(key)
+            return self.with_segments(self._raw_path, key)
         except TypeError:
             return NotImplemented
 
     def __rtruediv__(self, key):
         try:
-            return self.with_segments(key, *self._raw_paths)
+            return self.with_segments(key, self._raw_path)
         except TypeError:
             return NotImplemented
 
@@ -371,7 +404,7 @@ class PurePathBase:
     def parent(self):
         """The logical parent of the path."""
         path = self._raw_path
-        parent = self.pathmod.dirname(path)
+        parent = self.pathmod.split(path)[0]
         if path != parent:
             parent = self.with_segments(parent)
             parent._resolving = self._resolving
@@ -381,43 +414,20 @@ class PurePathBase:
     @property
     def parents(self):
         """A sequence of this path's logical parents."""
-        dirname = self.pathmod.dirname
+        split = self.pathmod.split
         path = self._raw_path
-        parent = dirname(path)
+        parent = split(path)[0]
         parents = []
         while path != parent:
             parents.append(self.with_segments(parent))
             path = parent
-            parent = dirname(path)
+            parent = split(path)[0]
         return tuple(parents)
 
     def is_absolute(self):
         """True if the path is absolute (has both a root and, if applicable,
         a drive)."""
-        if self.pathmod is posixpath:
-            # Optimization: work with raw paths on POSIX.
-            for path in self._raw_paths:
-                if path.startswith('/'):
-                    return True
-            return False
-        else:
-            return self.pathmod.isabs(self._raw_path)
-
-    def is_reserved(self):
-        """Return True if the path contains one of the special names reserved
-        by the system, if any."""
-        if self.pathmod is posixpath or not self.name:
-            return False
-
-        # NOTE: the rules for reserved names seem somewhat complicated
-        # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
-        # exist). We err on the side of caution and return True for paths
-        # which are not considered reserved by Windows.
-        if self.drive.startswith('\\\\'):
-            # UNC paths are never reserved.
-            return False
-        name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
-        return name.upper() in _WIN_RESERVED_NAMES
+        return self.pathmod.isabs(self._raw_path)
 
     def match(self, path_pattern, *, case_sensitive=None):
         """
@@ -726,7 +736,7 @@ class PathBase(PurePathBase):
             raise ValueError("Unacceptable pattern: {!r}".format(pattern))
 
         pattern_parts = list(path_pattern.parts)
-        if not self.pathmod.basename(pattern):
+        if not self.pathmod.split(pattern)[1]:
             # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
             pattern_parts.append('')
 
index 1b560adfc3b57af33f10504d75c534eb87cc6761..61d7939ad140b2c4db817cea5f91677d3318064a 100644 (file)
@@ -1151,6 +1151,7 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest):
 
     def test_matches_pathbase_api(self):
         our_names = {name for name in dir(self.cls) if name[0] != '_'}
+        our_names.remove('is_reserved')  # only present in PurePath
         path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
         self.assertEqual(our_names, path_names)
         for attr_name in our_names:
index 14df1e69db1f96a98327b222f3830fd4e2a64f32..c3c568c296e25c5912caed9adc6468c8a0ce8c30 100644 (file)
@@ -5,7 +5,7 @@ import errno
 import stat
 import unittest
 
-from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase
+from pathlib._abc import UnsupportedOperation, PathModuleBase, PurePathBase, PathBase
 import posixpath
 
 from test.support.os_helper import TESTFN
@@ -17,6 +17,20 @@ class UnsupportedOperationTest(unittest.TestCase):
         self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError))
 
 
+class PathModuleBaseTest(unittest.TestCase):
+    cls = PathModuleBase
+
+    def test_unsupported_operation(self):
+        m = self.cls()
+        e = UnsupportedOperation
+        with self.assertRaises(e):
+            m.sep
+        self.assertRaises(e, m.join, 'foo')
+        self.assertRaises(e, m.split, 'foo')
+        self.assertRaises(e, m.splitroot, 'foo')
+        self.assertRaises(e, m.normcase, 'foo')
+        self.assertRaises(e, m.isabs, 'foo')
+
 #
 # Tests for the pure classes.
 #
@@ -25,6 +39,42 @@ class UnsupportedOperationTest(unittest.TestCase):
 class PurePathBaseTest(unittest.TestCase):
     cls = PurePathBase
 
+    def test_unsupported_operation_pure(self):
+        p = self.cls('foo')
+        e = UnsupportedOperation
+        with self.assertRaises(e):
+            p.drive
+        with self.assertRaises(e):
+            p.root
+        with self.assertRaises(e):
+            p.anchor
+        with self.assertRaises(e):
+            p.parts
+        with self.assertRaises(e):
+            p.parent
+        with self.assertRaises(e):
+            p.parents
+        with self.assertRaises(e):
+            p.name
+        with self.assertRaises(e):
+            p.stem
+        with self.assertRaises(e):
+            p.suffix
+        with self.assertRaises(e):
+            p.suffixes
+        with self.assertRaises(e):
+            p / 'bar'
+        with self.assertRaises(e):
+            'bar' / p
+        self.assertRaises(e, p.joinpath, 'bar')
+        self.assertRaises(e, p.with_name, 'bar')
+        self.assertRaises(e, p.with_stem, 'bar')
+        self.assertRaises(e, p.with_suffix, '.txt')
+        self.assertRaises(e, p.relative_to, '')
+        self.assertRaises(e, p.is_relative_to, '')
+        self.assertRaises(e, p.is_absolute)
+        self.assertRaises(e, p.match, '*')
+
     def test_magic_methods(self):
         P = self.cls
         self.assertFalse(hasattr(P, '__fspath__'))
@@ -39,11 +89,12 @@ class PurePathBaseTest(unittest.TestCase):
         self.assertIs(P.__ge__, object.__ge__)
 
     def test_pathmod(self):
-        self.assertIs(self.cls.pathmod, posixpath)
+        self.assertIsInstance(self.cls.pathmod, PathModuleBase)
 
 
 class DummyPurePath(PurePathBase):
     __slots__ = ()
+    pathmod = posixpath
 
     def __eq__(self, other):
         if not isinstance(other, DummyPurePath):
@@ -669,6 +720,7 @@ class DummyPath(PathBase):
     memory.
     """
     __slots__ = ()
+    pathmod = posixpath
 
     _files = {}
     _directories = {}