]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-76846, GH-85281: Call `__new__()` and `__init__()` on pathlib subclasses (GH-102789)
authorBarney Gale <barney.gale@gmail.com>
Mon, 3 Apr 2023 18:57:11 +0000 (19:57 +0100)
committerGitHub <noreply@github.com>
Mon, 3 Apr 2023 18:57:11 +0000 (19:57 +0100)
Fix an issue where `__new__()` and `__init__()` were not called on subclasses of `pathlib.PurePath` and `Path` in some circumstances.

Paths are now normalized on-demand. This speeds up path construction, `p.joinpath(q)`, and `p / q`.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
Lib/pathlib.py
Lib/test/test_pathlib.py
Misc/NEWS.d/next/Library/2023-03-17-19-14-26.gh-issue-76846.KEamjK.rst [new file with mode: 0644]

index a126bf2fe5570a7d1ccc66dd415e8eb29d68d581..490f89f39d26d1bd0376442d4dfd5d8a744baebf 100644 (file)
@@ -16,7 +16,6 @@ import sys
 import warnings
 from _collections_abc import Sequence
 from errno import ENOENT, ENOTDIR, EBADF, ELOOP
-from operator import attrgetter
 from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
 from urllib.parse import quote_from_bytes as urlquote_from_bytes
 
@@ -216,8 +215,8 @@ class _PathParents(Sequence):
     def __init__(self, path):
         # We don't store the instance to avoid reference cycles
         self._pathcls = type(path)
-        self._drv = path._drv
-        self._root = path._root
+        self._drv = path.drive
+        self._root = path.root
         self._parts = path._parts
 
     def __len__(self):
@@ -251,12 +250,12 @@ class PurePath(object):
     directly, regardless of your system.
     """
     __slots__ = (
-        '_drv', '_root', '_parts',
+        '_raw_path', '_drv', '_root', '_parts_cached',
         '_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
     )
     _flavour = os.path
 
-    def __new__(cls, *args):
+    def __new__(cls, *args, **kwargs):
         """Construct a PurePath from one or several strings and or existing
         PurePath objects.  The strings and path objects are combined so as
         to yield a canonicalized path, which is incorporated into the
@@ -264,23 +263,20 @@ class PurePath(object):
         """
         if cls is PurePath:
             cls = PureWindowsPath if os.name == 'nt' else PurePosixPath
-        return cls._from_parts(args)
+        return object.__new__(cls)
 
     def __reduce__(self):
         # Using the parts tuple helps share interned path parts
         # when pickling related paths.
-        return (self.__class__, tuple(self._parts))
+        return (self.__class__, self.parts)
 
-    @classmethod
-    def _parse_parts(cls, parts):
-        if not parts:
-            return '', '', []
-        elif len(parts) == 1:
-            path = os.fspath(parts[0])
+    def __init__(self, *args):
+        if not args:
+            path = ''
+        elif len(args) == 1:
+            path = os.fspath(args[0])
         else:
-            path = cls._flavour.join(*parts)
-        sep = cls._flavour.sep
-        altsep = cls._flavour.altsep
+            path = self._flavour.join(*args)
         if isinstance(path, str):
             # Force-cast str subclasses to str (issue #21127)
             path = str(path)
@@ -289,6 +285,14 @@ class PurePath(object):
                 "argument should be a str or an os.PathLike "
                 "object where __fspath__ returns a str, "
                 f"not {type(path).__name__!r}")
+        self._raw_path = path
+
+    @classmethod
+    def _parse_path(cls, path):
+        if not path:
+            return '', '', []
+        sep = cls._flavour.sep
+        altsep = cls._flavour.altsep
         if altsep:
             path = path.replace(altsep, sep)
         drv, root, rel = cls._flavour.splitroot(path)
@@ -299,21 +303,20 @@ class PurePath(object):
         parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
         return drv, root, parsed
 
-    @classmethod
-    def _from_parts(cls, args):
-        self = object.__new__(cls)
-        drv, root, parts = self._parse_parts(args)
+    def _load_parts(self):
+        drv, root, parts = self._parse_path(self._raw_path)
         self._drv = drv
         self._root = root
-        self._parts = parts
-        return self
+        self._parts_cached = parts
 
     @classmethod
     def _from_parsed_parts(cls, drv, root, parts):
-        self = object.__new__(cls)
+        path = cls._format_parsed_parts(drv, root, parts)
+        self = cls(path)
+        self._str = path or '.'
         self._drv = drv
         self._root = root
-        self._parts = parts
+        self._parts_cached = parts
         return self
 
     @classmethod
@@ -330,7 +333,7 @@ class PurePath(object):
         try:
             return self._str
         except AttributeError:
-            self._str = self._format_parsed_parts(self._drv, self._root,
+            self._str = self._format_parsed_parts(self.drive, self.root,
                                                   self._parts) or '.'
             return self._str
 
@@ -356,7 +359,7 @@ class PurePath(object):
         if not self.is_absolute():
             raise ValueError("relative path can't be expressed as a file URI")
 
-        drive = self._drv
+        drive = self.drive
         if len(drive) == 2 and drive[1] == ':':
             # It's a path on a local drive => 'file:///c:/a/b'
             prefix = 'file:///' + drive
@@ -412,23 +415,43 @@ class PurePath(object):
             return NotImplemented
         return self._parts_normcase >= other._parts_normcase
 
-    drive = property(attrgetter('_drv'),
-                     doc="""The drive prefix (letter or UNC path), if any.""")
+    @property
+    def drive(self):
+        """The drive prefix (letter or UNC path), if any."""
+        try:
+            return self._drv
+        except AttributeError:
+            self._load_parts()
+            return self._drv
+
+    @property
+    def root(self):
+        """The root of the path, if any."""
+        try:
+            return self._root
+        except AttributeError:
+            self._load_parts()
+            return self._root
 
-    root = property(attrgetter('_root'),
-                    doc="""The root of the path, if any.""")
+    @property
+    def _parts(self):
+        try:
+            return self._parts_cached
+        except AttributeError:
+            self._load_parts()
+            return self._parts_cached
 
     @property
     def anchor(self):
         """The concatenation of the drive and root, or ''."""
-        anchor = self._drv + self._root
+        anchor = self.drive + self.root
         return anchor
 
     @property
     def name(self):
         """The final path component, if any."""
         parts = self._parts
-        if len(parts) == (1 if (self._drv or self._root) else 0):
+        if len(parts) == (1 if (self.drive or self.root) else 0):
             return ''
         return parts[-1]
 
@@ -477,7 +500,7 @@ class PurePath(object):
         drv, root, tail = f.splitroot(name)
         if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
             raise ValueError("Invalid name %r" % (name))
-        return self._from_parsed_parts(self._drv, self._root,
+        return self._from_parsed_parts(self.drive, self.root,
                                        self._parts[:-1] + [name])
 
     def with_stem(self, stem):
@@ -502,7 +525,7 @@ class PurePath(object):
             name = name + suffix
         else:
             name = name[:-len(old_suffix)] + suffix
-        return self._from_parsed_parts(self._drv, self._root,
+        return self._from_parsed_parts(self.drive, self.root,
                                        self._parts[:-1] + [name])
 
     def relative_to(self, other, /, *_deprecated, walk_up=False):
@@ -561,22 +584,7 @@ class PurePath(object):
         paths) or a totally different path (if one of the arguments is
         anchored).
         """
-        drv1, root1, parts1 = self._drv, self._root, self._parts
-        drv2, root2, parts2 = self._parse_parts(args)
-        if root2:
-            if not drv2 and drv1:
-                return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
-            else:
-                return self._from_parsed_parts(drv2, root2, parts2)
-        elif drv2:
-            if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
-                # Same drive => second path is relative to the first.
-                return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
-            else:
-                return self._from_parsed_parts(drv2, root2, parts2)
-        else:
-            # Second path is non-anchored (common case).
-            return self._from_parsed_parts(drv1, root1, parts1 + parts2)
+        return self.__class__(self._raw_path, *args)
 
     def __truediv__(self, key):
         try:
@@ -586,15 +594,15 @@ class PurePath(object):
 
     def __rtruediv__(self, key):
         try:
-            return self._from_parts([key] + self._parts)
+            return type(self)(key, self._raw_path)
         except TypeError:
             return NotImplemented
 
     @property
     def parent(self):
         """The logical parent of the path."""
-        drv = self._drv
-        root = self._root
+        drv = self.drive
+        root = self.root
         parts = self._parts
         if len(parts) == 1 and (drv or root):
             return self
@@ -610,7 +618,7 @@ class PurePath(object):
         a drive)."""
         # ntpath.isabs() is defective - see GH-44626 .
         if self._flavour is ntpath:
-            return bool(self._drv and self._root)
+            return bool(self.drive and self.root)
         return self._flavour.isabs(self)
 
     def is_reserved(self):
@@ -634,7 +642,7 @@ class PurePath(object):
         Return True if this path matches the given pattern.
         """
         path_pattern = self._flavour.normcase(path_pattern)
-        drv, root, pat_parts = self._parse_parts((path_pattern,))
+        drv, root, pat_parts = self._parse_path(path_pattern)
         if not pat_parts:
             raise ValueError("empty pattern")
         parts = self._parts_normcase
@@ -687,20 +695,23 @@ class Path(PurePath):
     """
     __slots__ = ()
 
-    def __new__(cls, *args, **kwargs):
+    def __init__(self, *args, **kwargs):
         if kwargs:
             msg = ("support for supplying keyword arguments to pathlib.PurePath "
                    "is deprecated and scheduled for removal in Python {remove}")
             warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14))
+        super().__init__(*args)
+
+    def __new__(cls, *args, **kwargs):
         if cls is Path:
             cls = WindowsPath if os.name == 'nt' else PosixPath
-        return cls._from_parts(args)
+        return object.__new__(cls)
 
     def _make_child_relpath(self, part):
         # This is an optimization used for dir walking.  `part` must be
         # a single part relative to this path.
         parts = self._parts + [part]
-        return self._from_parsed_parts(self._drv, self._root, parts)
+        return self._from_parsed_parts(self.drive, self.root, parts)
 
     def __enter__(self):
         # In previous versions of pathlib, __exit__() marked this path as
@@ -770,7 +781,7 @@ class Path(PurePath):
         sys.audit("pathlib.Path.glob", self, pattern)
         if not pattern:
             raise ValueError("Unacceptable pattern: {!r}".format(pattern))
-        drv, root, pattern_parts = self._parse_parts((pattern,))
+        drv, root, pattern_parts = self._parse_path(pattern)
         if drv or root:
             raise NotImplementedError("Non-relative patterns are unsupported")
         if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
@@ -785,7 +796,7 @@ class Path(PurePath):
         this subtree.
         """
         sys.audit("pathlib.Path.rglob", self, pattern)
-        drv, root, pattern_parts = self._parse_parts((pattern,))
+        drv, root, pattern_parts = self._parse_path(pattern)
         if drv or root:
             raise NotImplementedError("Non-relative patterns are unsupported")
         if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
@@ -802,12 +813,12 @@ class Path(PurePath):
         """
         if self.is_absolute():
             return self
-        elif self._drv:
+        elif self.drive:
             # There is a CWD on each drive-letter drive.
-            cwd = self._flavour.abspath(self._drv)
+            cwd = self._flavour.abspath(self.drive)
         else:
             cwd = os.getcwd()
-        return self._from_parts([cwd] + self._parts)
+        return type(self)(cwd, self._raw_path)
 
     def resolve(self, strict=False):
         """
@@ -825,7 +836,7 @@ class Path(PurePath):
         except OSError as e:
             check_eloop(e)
             raise
-        p = self._from_parts((s,))
+        p = type(self)(s)
 
         # In non-strict mode, realpath() doesn't raise on symlink loops.
         # Ensure we get an exception by calling stat()
@@ -915,7 +926,7 @@ class Path(PurePath):
         """
         if not hasattr(os, "readlink"):
             raise NotImplementedError("os.readlink() not available on this system")
-        return self._from_parts((os.readlink(self),))
+        return type(self)(os.readlink(self))
 
     def touch(self, mode=0o666, exist_ok=True):
         """
@@ -1184,12 +1195,12 @@ class Path(PurePath):
         """ Return a new path with expanded ~ and ~user constructs
         (as returned by os.path.expanduser)
         """
-        if (not (self._drv or self._root) and
+        if (not (self.drive or self.root) and
             self._parts and self._parts[0][:1] == '~'):
             homedir = self._flavour.expanduser(self._parts[0])
             if homedir[:1] == "~":
                 raise RuntimeError("Could not determine home directory.")
-            drv, root, parts = self._parse_parts((homedir,))
+            drv, root, parts = self._parse_path(homedir)
             return self._from_parsed_parts(drv, root, parts + self._parts[1:])
 
         return self
index 3041630da678998a1dd738b3ee21f829b3ae86e2..8b6e012b730d752ce96e056dd42337321d10df7c 100644 (file)
@@ -27,7 +27,9 @@ except ImportError:
 class _BaseFlavourTest(object):
 
     def _check_parse_parts(self, arg, expected):
-        f = self.cls._parse_parts
+        def f(parts):
+            path = self.cls(*parts)._raw_path
+            return self.cls._parse_path(path)
         sep = self.flavour.sep
         altsep = self.flavour.altsep
         actual = f([x.replace('/', sep) for x in arg])
@@ -136,6 +138,14 @@ class NTFlavourTest(_BaseFlavourTest, unittest.TestCase):
 # Tests for the pure classes.
 #
 
+class _BasePurePathSubclass(object):
+    init_called = False
+
+    def __init__(self, *args):
+        super().__init__(*args)
+        self.init_called = True
+
+
 class _BasePurePathTest(object):
 
     # Keys are canonical paths, values are list of tuples of arguments
@@ -221,6 +231,21 @@ class _BasePurePathTest(object):
         self._check_str_subclass('a/b.txt')
         self._check_str_subclass('/a/b.txt')
 
+    def test_init_called_common(self):
+        class P(_BasePurePathSubclass, self.cls):
+            pass
+        p = P('foo', 'bar')
+        self.assertTrue((p / 'foo').init_called)
+        self.assertTrue(('foo' / p).init_called)
+        self.assertTrue(p.joinpath('foo').init_called)
+        self.assertTrue(p.with_name('foo').init_called)
+        self.assertTrue(p.with_stem('foo').init_called)
+        self.assertTrue(p.with_suffix('.foo').init_called)
+        self.assertTrue(p.relative_to('foo').init_called)
+        self.assertTrue(p.parent.init_called)
+        for parent in p.parents:
+            self.assertTrue(parent.init_called)
+
     def test_join_common(self):
         P = self.cls
         p = P('a/b')
diff --git a/Misc/NEWS.d/next/Library/2023-03-17-19-14-26.gh-issue-76846.KEamjK.rst b/Misc/NEWS.d/next/Library/2023-03-17-19-14-26.gh-issue-76846.KEamjK.rst
new file mode 100644 (file)
index 0000000..9fba11f
--- /dev/null
@@ -0,0 +1,3 @@
+Fix issue where ``__new__()`` and ``__init__()`` methods of
+:class:`pathlib.PurePath` and :class:`~pathlib.Path` subclasses were not
+called in some circumstances.