]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-110109: Add `pathlib._PurePathBase` (#110670)
authorBarney Gale <barney.gale@gmail.com>
Fri, 8 Dec 2023 17:39:04 +0000 (17:39 +0000)
committerGitHub <noreply@github.com>
Fri, 8 Dec 2023 17:39:04 +0000 (17:39 +0000)
Add private `pathlib._PurePathBase` class: a private superclass of both `PurePath` and `_PathBase`. Unlike `PurePath`, it does not define any of these special methods: `__fspath__`, `__bytes__`, `__reduce__`, `__hash__`, `__eq__`, `__lt__`, `__le__`, `__gt__`, `__ge__`. Its initializer and path joining methods accept only strings, not os.PathLike objects more broadly.

This is important for supporting *virtual paths*: user subclasses of `_PathBase` that provide access to archive files, FTP servers, etc. In these classes, the above methods should be implemented by users only as appropriate, with due consideration for the hash/equality of any backing objects, such as file objects or sockets.

Lib/pathlib.py
Lib/test/test_pathlib.py
Misc/NEWS.d/next/Library/2023-10-11-02-34-01.gh-issue-110109.RFCmHs.rst [new file with mode: 0644]

index c48cff307083a81f0cd718a4a68262e7f21a5867..87d1f6b58ec52ed99c3d16e474f7c4afc4002b22 100644 (file)
@@ -198,14 +198,13 @@ class _PathParents(Sequence):
         return "<{}.parents>".format(type(self._path).__name__)
 
 
-class PurePath:
-    """Base class for manipulating paths without I/O.
+class _PurePathBase:
+    """Base class for pure path objects.
 
-    PurePath represents a filesystem path and offers operations which
-    don't imply any actual filesystem I/O.  Depending on your system,
-    instantiating a PurePath will return either a PurePosixPath or a
-    PureWindowsPath object.  You can also instantiate either of these classes
-    directly, regardless of your system.
+    This class *does not* provide several magic methods that are defined in
+    its subclass PurePath. They are: __fspath__, __bytes__, __reduce__,
+    __hash__, __eq__, __lt__, __le__, __gt__, __ge__. Its initializer and path
+    joining methods accept only strings, not os.PathLike objects more broadly.
     """
 
     __slots__ = (
@@ -227,22 +226,6 @@ class PurePath:
         # for the first time. It's used to implement `_str_normcase`
         '_str',
 
-        # The `_str_normcase_cached` slot stores the string path with
-        # normalized case. It is set when the `_str_normcase` property is
-        # accessed for the first time. It's used to implement `__eq__()`
-        # `__hash__()`, and `_parts_normcase`
-        '_str_normcase_cached',
-
-        # The `_parts_normcase_cached` slot stores the case-normalized
-        # string path after splitting on path separators. It's set when the
-        # `_parts_normcase` property is accessed for the first time. It's used
-        # to implement comparison methods like `__lt__()`.
-        '_parts_normcase_cached',
-
-        # The `_hash` slot stores the hash of the case-normalized string
-        # path. It's set when `__hash__()` is called for the first time.
-        '_hash',
-
         # The '_resolving' slot stores a boolean indicating whether the path
         # is being processed by `_PathBase.resolve()`. This prevents duplicate
         # work from occurring when `resolve()` calls `stat()` or `readlink()`.
@@ -250,6 +233,10 @@ class PurePath:
     )
     pathmod = os.path
 
+    def __init__(self, *paths):
+        self._raw_paths = paths
+        self._resolving = False
+
     def with_segments(self, *pathsegments):
         """Construct a new path object from any number of path-like objects.
         Subclasses may override this method to customize how new path objects
@@ -444,7 +431,7 @@ class PurePath:
             warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg,
                                  remove=(3, 14))
             other = self.with_segments(other, *_deprecated)
-        elif not isinstance(other, PurePath):
+        elif not isinstance(other, _PurePathBase):
             other = self.with_segments(other)
         for step, path in enumerate(chain([other], other.parents)):
             if path == self or path in self.parents:
@@ -468,7 +455,7 @@ class PurePath:
             warnings._deprecated("pathlib.PurePath.is_relative_to(*args)",
                                  msg, remove=(3, 14))
             other = self.with_segments(other, *_deprecated)
-        elif not isinstance(other, PurePath):
+        elif not isinstance(other, _PurePathBase):
             other = self.with_segments(other)
         return other == self or other in self.parents
 
@@ -487,7 +474,7 @@ class PurePath:
         paths) or a totally different path (if one of the arguments is
         anchored).
         """
-        return self.with_segments(self, *pathsegments)
+        return self.with_segments(*self._raw_paths, *pathsegments)
 
     def __truediv__(self, key):
         try:
@@ -497,7 +484,7 @@ class PurePath:
 
     def __rtruediv__(self, key):
         try:
-            return self.with_segments(key, self)
+            return self.with_segments(key, *self._raw_paths)
         except TypeError:
             return NotImplemented
 
@@ -555,7 +542,7 @@ class PurePath:
         """
         Return True if this path matches the given pattern.
         """
-        if not isinstance(path_pattern, PurePath):
+        if not isinstance(path_pattern, _PurePathBase):
             path_pattern = self.with_segments(path_pattern)
         if case_sensitive is None:
             case_sensitive = _is_case_sensitive(self.pathmod)
@@ -570,6 +557,35 @@ class PurePath:
         match = _compile_pattern(pattern_str, sep, case_sensitive)
         return match(str(self)) is not None
 
+
+class PurePath(_PurePathBase):
+    """Base class for manipulating paths without I/O.
+
+    PurePath represents a filesystem path and offers operations which
+    don't imply any actual filesystem I/O.  Depending on your system,
+    instantiating a PurePath will return either a PurePosixPath or a
+    PureWindowsPath object.  You can also instantiate either of these classes
+    directly, regardless of your system.
+    """
+
+    __slots__ = (
+        # The `_str_normcase_cached` slot stores the string path with
+        # normalized case. It is set when the `_str_normcase` property is
+        # accessed for the first time. It's used to implement `__eq__()`
+        # `__hash__()`, and `_parts_normcase`
+        '_str_normcase_cached',
+
+        # The `_parts_normcase_cached` slot stores the case-normalized
+        # string path after splitting on path separators. It's set when the
+        # `_parts_normcase` property is accessed for the first time. It's used
+        # to implement comparison methods like `__lt__()`.
+        '_parts_normcase_cached',
+
+        # The `_hash` slot stores the hash of the case-normalized string
+        # path. It's set when `__hash__()` is called for the first time.
+        '_hash',
+    )
+
     def __new__(cls, *args, **kwargs):
         """Construct a PurePath from one or several strings and or existing
         PurePath objects.  The strings and path objects are combined so as
@@ -600,8 +616,7 @@ class PurePath:
                         "object where __fspath__ returns a str, "
                         f"not {type(path).__name__!r}")
                 paths.append(path)
-        self._raw_paths = paths
-        self._resolving = False
+        super().__init__(*paths)
 
     def __reduce__(self):
         # Using the parts tuple helps share interned path parts
@@ -719,7 +734,7 @@ class PureWindowsPath(PurePath):
 # Filesystem-accessing classes
 
 
-class _PathBase(PurePath):
+class _PathBase(_PurePathBase):
     """Base class for concrete path objects.
 
     This class provides dummy implementations for many methods that derived
@@ -733,8 +748,6 @@ class _PathBase(PurePath):
     such as paths in archive files or on remote storage systems.
     """
     __slots__ = ()
-    __bytes__ = None
-    __fspath__ = None  # virtual paths have no local file system representation
 
     @classmethod
     def _unsupported(cls, method_name):
@@ -1341,7 +1354,7 @@ class _PathBase(PurePath):
         self._unsupported("as_uri")
 
 
-class Path(_PathBase):
+class Path(_PathBase, PurePath):
     """PurePath subclass that can make system calls.
 
     Path represents a filesystem path but unlike PurePath, also offers
@@ -1351,8 +1364,6 @@ class Path(_PathBase):
     but cannot instantiate a WindowsPath on a POSIX system or vice versa.
     """
     __slots__ = ()
-    __bytes__ = PurePath.__bytes__
-    __fspath__ = PurePath.__fspath__
     as_uri = PurePath.as_uri
 
     def __init__(self, *args, **kwargs):
index ea922143e36e4853c4fa714cf7760f767de0c3e2..d35516a5c8fd9bef79c2ed781c2856a7727efd35 100644 (file)
@@ -49,8 +49,35 @@ if hasattr(os, 'geteuid'):
 # Tests for the pure classes.
 #
 
-class PurePathTest(unittest.TestCase):
-    cls = pathlib.PurePath
+
+class PurePathBaseTest(unittest.TestCase):
+    cls = pathlib._PurePathBase
+
+    def test_magic_methods(self):
+        P = self.cls
+        self.assertFalse(hasattr(P, '__fspath__'))
+        self.assertFalse(hasattr(P, '__bytes__'))
+        self.assertIs(P.__reduce__, object.__reduce__)
+        self.assertIs(P.__hash__, object.__hash__)
+        self.assertIs(P.__eq__, object.__eq__)
+        self.assertIs(P.__lt__, object.__lt__)
+        self.assertIs(P.__le__, object.__le__)
+        self.assertIs(P.__gt__, object.__gt__)
+        self.assertIs(P.__ge__, object.__ge__)
+
+
+class DummyPurePath(pathlib._PurePathBase):
+    def __eq__(self, other):
+        if not isinstance(other, DummyPurePath):
+            return NotImplemented
+        return str(self) == str(other)
+
+    def __hash__(self):
+        return hash(str(self))
+
+
+class DummyPurePathTest(unittest.TestCase):
+    cls = DummyPurePath
 
     # Keys are canonical paths, values are list of tuples of arguments
     # supposed to produce equal paths.
@@ -82,12 +109,6 @@ class PurePathTest(unittest.TestCase):
         P('/a', 'b', 'c')
         P('a/b/c')
         P('/a/b/c')
-        P(FakePath("a/b/c"))
-        self.assertEqual(P(P('a')), P('a'))
-        self.assertEqual(P(P('a'), 'b'), P('a/b'))
-        self.assertEqual(P(P('a'), P('b')), P('a/b'))
-        self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
-        self.assertEqual(P(P('./a:b')), P('./a:b'))
 
     def test_concrete_class(self):
         if self.cls is pathlib.PurePath:
@@ -193,8 +214,6 @@ class PurePathTest(unittest.TestCase):
         self.assertIs(type(pp), type(p))
         pp = p.joinpath('c', 'd')
         self.assertEqual(pp, P('a/b/c/d'))
-        pp = p.joinpath(P('c'))
-        self.assertEqual(pp, P('a/b/c'))
         pp = p.joinpath('/c')
         self.assertEqual(pp, P('/c'))
 
@@ -211,8 +230,6 @@ class PurePathTest(unittest.TestCase):
         self.assertEqual(pp, P('a/b/c/d'))
         pp = 'c' / p / 'd'
         self.assertEqual(pp, P('c/a/b/d'))
-        pp = p / P('c')
-        self.assertEqual(pp, P('a/b/c'))
         pp = p/ '/c'
         self.assertEqual(pp, P('/c'))
 
@@ -678,6 +695,29 @@ class PurePathTest(unittest.TestCase):
         self.assertFalse(p.is_relative_to(''))
         self.assertFalse(p.is_relative_to(P('a')))
 
+
+class PurePathTest(DummyPurePathTest):
+    cls = pathlib.PurePath
+
+    def test_constructor_nested(self):
+        P = self.cls
+        P(FakePath("a/b/c"))
+        self.assertEqual(P(P('a')), P('a'))
+        self.assertEqual(P(P('a'), 'b'), P('a/b'))
+        self.assertEqual(P(P('a'), P('b')), P('a/b'))
+        self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
+        self.assertEqual(P(P('./a:b')), P('./a:b'))
+
+    def test_join_nested(self):
+        P = self.cls
+        p = P('a/b').joinpath(P('c'))
+        self.assertEqual(p, P('a/b/c'))
+
+    def test_div_nested(self):
+        P = self.cls
+        p = P('a/b') / P('c')
+        self.assertEqual(p, P('a/b/c'))
+
     def test_pickling_common(self):
         P = self.cls
         p = P('/a/b')
@@ -1545,7 +1585,7 @@ class PurePathSubclassTest(PurePathTest):
 # Tests for the virtual classes.
 #
 
-class PathBaseTest(PurePathTest):
+class PathBaseTest(PurePathBaseTest):
     cls = pathlib._PathBase
 
     def test_unsupported_operation(self):
@@ -1636,6 +1676,14 @@ class DummyPath(pathlib._PathBase):
     _directories = {}
     _symlinks = {}
 
+    def __eq__(self, other):
+        if not isinstance(other, DummyPath):
+            return NotImplemented
+        return str(self) == str(other)
+
+    def __hash__(self):
+        return hash(str(self))
+
     def stat(self, *, follow_symlinks=True):
         if follow_symlinks:
             path = str(self.resolve())
@@ -1707,7 +1755,7 @@ class DummyPath(pathlib._PathBase):
             self.mkdir(mode, parents=False, exist_ok=exist_ok)
 
 
-class DummyPathTest(unittest.TestCase):
+class DummyPathTest(DummyPurePathTest):
     """Tests for PathBase methods that use stat(), open() and iterdir()."""
 
     cls = DummyPath
@@ -2014,7 +2062,7 @@ class DummyPathTest(unittest.TestCase):
 
     def test_rglob_common(self):
         def _check(glob, expected):
-            self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
+            self.assertEqual(set(glob), {P(BASE, q) for q in expected})
         P = self.cls
         p = P(BASE)
         it = p.rglob("fileA")
@@ -2198,7 +2246,7 @@ class DummyPathTest(unittest.TestCase):
         # directory_depth > recursion_limit
         directory_depth = recursion_limit + 10
         base = self.cls(BASE, 'deep')
-        path = self.cls(base, *(['d'] * directory_depth))
+        path = base.joinpath(*(['d'] * directory_depth))
         path.mkdir(parents=True)
 
         with set_recursion_limit(recursion_limit):
@@ -2741,7 +2789,7 @@ class DummyPathTest(unittest.TestCase):
         # directory_depth > recursion_limit
         directory_depth = recursion_limit + 10
         base = self.cls(BASE, 'deep')
-        path = self.cls(base, *(['d'] * directory_depth))
+        path = base.joinpath(*(['d'] * directory_depth))
         path.mkdir(parents=True)
 
         with set_recursion_limit(recursion_limit):
diff --git a/Misc/NEWS.d/next/Library/2023-10-11-02-34-01.gh-issue-110109.RFCmHs.rst b/Misc/NEWS.d/next/Library/2023-10-11-02-34-01.gh-issue-110109.RFCmHs.rst
new file mode 100644 (file)
index 0000000..4f12d12
--- /dev/null
@@ -0,0 +1,3 @@
+Add private ``pathlib._PurePathBase`` class: a base class for
+:class:`pathlib.PurePath` that omits certain magic methods. It may be made
+public (along with ``_PathBase``) in future.