]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-101362: Optimise PurePath(PurePath(...)) (GH-101667)
authorBarney Gale <barney.gale@gmail.com>
Sun, 5 Mar 2023 23:50:21 +0000 (23:50 +0000)
committerGitHub <noreply@github.com>
Sun, 5 Mar 2023 23:50:21 +0000 (15:50 -0800)
The previous `_parse_args()` method pulled the `_parts` out of any supplied `PurePath` objects; these were subsequently joined in `_from_parts()` using `os.path.join()`. This is actually a slower form of joining than calling `fspath()` on the path object, because it doesn't take advantage of the fact that the contents of `_parts` is normalized!

This reduces the time taken to run `PurePath("foo", "bar")` by ~20%, and the time taken to run `PurePath(p, "cheese")`, where `p = PurePath("/foo", "bar", "baz")`, by ~40%.

Automerge-Triggered-By: GH:AlexWaygood
Doc/library/pathlib.rst
Lib/pathlib.py
Lib/test/test_pathlib.py
Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst [new file with mode: 0644]

index c8a734ecad8e7ba399c4365e4f099a422963c83c..8e91936680fab895b302a3f8b3c56ecdb73c7a96 100644 (file)
@@ -105,8 +105,9 @@ we also call *flavours*:
       PurePosixPath('setup.py')
 
    Each element of *pathsegments* can be either a string representing a
-   path segment, an object implementing the :class:`os.PathLike` interface
-   which returns a string, or another path object::
+   path segment, or an object implementing the :class:`os.PathLike` interface
+   where the :meth:`~os.PathLike.__fspath__` method returns a string,
+   such as another path object::
 
       >>> PurePath('foo', 'some/path', 'bar')
       PurePosixPath('foo/some/path/bar')
index c37ff21c0352d89c815cfc7f648b4e29c58fdfd7..d375529ff5f767629500f224f9470751b1916618 100644 (file)
@@ -281,6 +281,14 @@ class PurePath(object):
             path = cls._flavour.join(*parts)
         sep = cls._flavour.sep
         altsep = cls._flavour.altsep
+        if isinstance(path, str):
+            # Force-cast str subclasses to str (issue #21127)
+            path = str(path)
+        else:
+            raise TypeError(
+                "argument should be a str or an os.PathLike "
+                "object where __fspath__ returns a str, "
+                f"not {type(path).__name__!r}")
         if altsep:
             path = path.replace(altsep, sep)
         drv, root, rel = cls._flavour.splitroot(path)
@@ -291,32 +299,10 @@ class PurePath(object):
         parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
         return drv, root, parsed
 
-    @classmethod
-    def _parse_args(cls, args):
-        # This is useful when you don't want to create an instance, just
-        # canonicalize some constructor arguments.
-        parts = []
-        for a in args:
-            if isinstance(a, PurePath):
-                parts += a._parts
-            else:
-                a = os.fspath(a)
-                if isinstance(a, str):
-                    # Force-cast str subclasses to str (issue #21127)
-                    parts.append(str(a))
-                else:
-                    raise TypeError(
-                        "argument should be a str object or an os.PathLike "
-                        "object returning str, not %r"
-                        % type(a))
-        return cls._parse_parts(parts)
-
     @classmethod
     def _from_parts(cls, args):
-        # We need to call _parse_args on the instance, so as to get the
-        # right flavour.
         self = object.__new__(cls)
-        drv, root, parts = self._parse_args(args)
+        drv, root, parts = self._parse_parts(args)
         self._drv = drv
         self._root = root
         self._parts = parts
@@ -575,7 +561,7 @@ class PurePath(object):
         anchored).
         """
         drv1, root1, parts1 = self._drv, self._root, self._parts
-        drv2, root2, parts2 = self._parse_args(args)
+        drv2, root2, parts2 = self._parse_parts(args)
         if root2:
             if not drv2 and drv1:
                 return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
@@ -662,7 +648,7 @@ class PurePath(object):
         return True
 
 # Can't subclass os.PathLike from PurePath and keep the constructor
-# optimizations in PurePath._parse_args().
+# optimizations in PurePath.__slots__.
 os.PathLike.register(PurePath)
 
 
index 4de91d52c6d10c0c6bc0a916451d35771540dd03..df9c1f6ba65deb5ea78f0f3ab8b3fbe9950826ba 100644 (file)
@@ -166,6 +166,33 @@ class _BasePurePathTest(object):
         self.assertEqual(P(P('a'), P('b')), P('a/b'))
         self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c")))
 
+    def test_bytes(self):
+        P = self.cls
+        message = (r"argument should be a str or an os\.PathLike object "
+                   r"where __fspath__ returns a str, not 'bytes'")
+        with self.assertRaisesRegex(TypeError, message):
+            P(b'a')
+        with self.assertRaises(TypeError):
+            P(b'a', 'b')
+        with self.assertRaises(TypeError):
+            P('a', b'b')
+        with self.assertRaises(TypeError):
+            P('a').joinpath(b'b')
+        with self.assertRaises(TypeError):
+            P('a') / b'b'
+        with self.assertRaises(TypeError):
+            b'a' / P('b')
+        with self.assertRaises(TypeError):
+            P('a').match(b'b')
+        with self.assertRaises(TypeError):
+            P('a').relative_to(b'b')
+        with self.assertRaises(TypeError):
+            P('a').with_name(b'b')
+        with self.assertRaises(TypeError):
+            P('a').with_stem(b'b')
+        with self.assertRaises(TypeError):
+            P('a').with_suffix(b'b')
+
     def _check_str_subclass(self, *args):
         # Issue #21127: it should be possible to construct a PurePath object
         # from a str subclass instance, and it then gets converted to
diff --git a/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst b/Misc/NEWS.d/next/Library/2023-02-07-22-20-32.gh-issue-101362.Jlk6mt.rst
new file mode 100644 (file)
index 0000000..c05f92a
--- /dev/null
@@ -0,0 +1,4 @@
+Speed up :class:`pathlib.PurePath` construction by handling arguments more
+uniformly. When a :class:`pathlib.Path` argument is supplied,
+we use its string representation rather than joining its parts
+with :func:`os.path.join`.