GH-113528: Deoptimise `pathlib._abc.PurePathBase` (#113559)

author Barney Gale <barney.gale@gmail.com>

Tue, 9 Jan 2024 23:52:15 +0000 (23:52 +0000)

committer GitHub <noreply@github.com>

Tue, 9 Jan 2024 23:52:15 +0000 (23:52 +0000)
author Barney Gale <barney.gale@gmail.com>
Tue, 9 Jan 2024 23:52:15 +0000 (23:52 +0000)
committer GitHub <noreply@github.com>
Tue, 9 Jan 2024 23:52:15 +0000 (23:52 +0000)
diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py

index ccdd9c3d547c8e016d1dbabd12458d710e76dd0b..9d3fcd894164e525facda9888a44666fc1f49e87 100644 (file)
--- a/Lib/pathlib/__init__.py
+++ b/Lib/pathlib/__init__.py
@@ -76,6 +76,20 @@ class PurePath(_abc.PurePathBase):
      """
  
      __slots__ = (
+        # The `_drv`, `_root` and `_tail_cached` slots store parsed and
+        # normalized parts of the path. They are set when any of the `drive`,
+        # `root` or `_tail` properties are accessed for the first time. The
+        # three-part division corresponds to the result of
+        # `os.path.splitroot()`, except that the tail is further split on path
+        # separators (i.e. it is a list of strings), and that the root and
+        # tail are normalized.
+        '_drv', '_root', '_tail_cached',
+
+        # The `_str` slot stores the string representation of the path,
+        # computed from the drive, root and tail when `__str__()` is called
+        # for the first time. It's used to implement `_str_normcase`
+        '_str',
+
          # The `_str_normcase_cached` slot stores the string path with
          # normalized case. It is set when the `_str_normcase` property is
          # accessed for the first time. It's used to implement `__eq__()`
@@ -196,6 +210,94 @@ class PurePath(_abc.PurePathBase):
              return NotImplemented
          return self._parts_normcase >= other._parts_normcase
  
+    def __str__(self):
+        """Return the string representation of the path, suitable for
+        passing to system calls."""
+        try:
+            return self._str
+        except AttributeError:
+            self._str = self._format_parsed_parts(self.drive, self.root,
+                                                  self._tail) or '.'
+            return self._str
+
+    @classmethod
+    def _format_parsed_parts(cls, drv, root, tail):
+        if drv or root:
+            return drv + root + cls.pathmod.sep.join(tail)
+        elif tail and cls.pathmod.splitdrive(tail[0])[0]:
+            tail = ['.'] + tail
+        return cls.pathmod.sep.join(tail)
+
+    def _from_parsed_parts(self, drv, root, tail):
+        path_str = self._format_parsed_parts(drv, root, tail)
+        path = self.with_segments(path_str)
+        path._str = path_str or '.'
+        path._drv = drv
+        path._root = root
+        path._tail_cached = tail
+        return path
+
+    @classmethod
+    def _parse_path(cls, path):
+        if not path:
+            return '', '', []
+        sep = cls.pathmod.sep
+        altsep = cls.pathmod.altsep
+        if altsep:
+            path = path.replace(altsep, sep)
+        drv, root, rel = cls.pathmod.splitroot(path)
+        if not root and drv.startswith(sep) and not drv.endswith(sep):
+            drv_parts = drv.split(sep)
+            if len(drv_parts) == 4 and drv_parts[2] not in '?.':
+                # e.g. //server/share
+                root = sep
+            elif len(drv_parts) == 6:
+                # e.g. //?/unc/server/share
+                root = sep
+        parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
+        return drv, root, parsed
+
+    def _load_parts(self):
+        paths = self._raw_paths
+        if len(paths) == 0:
+            path = ''
+        elif len(paths) == 1:
+            path = paths[0]
+        else:
+            path = self.pathmod.join(*paths)
+        self._drv, self._root, self._tail_cached = self._parse_path(path)
+
+    @property
+    def drive(self):
+        """The drive prefix (letter or UNC path), if any."""
+        try:
+            return self._drv
+        except AttributeError:
+            self._load_parts()
+            return self._drv
+
+    @property
+    def root(self):
+        """The root of the path, if any."""
+        try:
+            return self._root
+        except AttributeError:
+            self._load_parts()
+            return self._root
+
+    @property
+    def _tail(self):
+        try:
+            return self._tail_cached
+        except AttributeError:
+            self._load_parts()
+            return self._tail_cached
+
+    @property
+    def anchor(self):
+        """The concatenation of the drive and root, or ''."""
+        return self.drive + self.root
+
      @property
      def parts(self):
          """An object providing sequence-like access to the
@@ -416,7 +518,7 @@ class Path(_abc.PathBase, PurePath):
      def _scandir(self):
          return os.scandir(self)
  
-    def _make_child_entry(self, entry):
+    def _make_child_entry(self, entry, is_dir=False):
          # Transform an entry yielded from _scandir() into a path object.
          path_str = entry.name if str(self) == '.' else entry.path
          path = self.with_segments(path_str)
diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py

index 5caad3c0502399cd185db77b7bccc592c45ac308..6a1928495c9775943d16f8d830f80052607aaabb 100644 (file)
--- a/Lib/pathlib/_abc.py
+++ b/Lib/pathlib/_abc.py
@@ -1,7 +1,6 @@
  import functools
  import ntpath
  import posixpath
-import sys
  from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
  from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
  
@@ -82,7 +81,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match):
                      except OSError:
                          continue
                  if match(entry.name):
-                    yield parent_path._make_child_entry(entry)
+                    yield parent_path._make_child_entry(entry, dir_only)
  
  
  def _select_recursive(parent_paths, dir_only, follow_symlinks):
@@ -105,7 +104,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks):
                  for entry in entries:
                      try:
                          if entry.is_dir(follow_symlinks=follow_symlinks):
-                            paths.append(path._make_child_entry(entry))
+                            paths.append(path._make_child_entry(entry, dir_only))
                              continue
                      except OSError:
                          pass
@@ -147,20 +146,6 @@ class PurePathBase:
          # in the `__init__()` method.
          '_raw_paths',
  
-        # The `_drv`, `_root` and `_tail_cached` slots store parsed and
-        # normalized parts of the path. They are set when any of the `drive`,
-        # `root` or `_tail` properties are accessed for the first time. The
-        # three-part division corresponds to the result of
-        # `os.path.splitroot()`, except that the tail is further split on path
-        # separators (i.e. it is a list of strings), and that the root and
-        # tail are normalized.
-        '_drv', '_root', '_tail_cached',
-
-        # The `_str` slot stores the string representation of the path,
-        # computed from the drive, root and tail when `__str__()` is called
-        # for the first time. It's used to implement `_str_normcase`
-        '_str',
-
          # The '_resolving' slot stores a boolean indicating whether the path
          # is being processed by `PathBase.resolve()`. This prevents duplicate
          # work from occurring when `resolve()` calls `stat()` or `readlink()`.
@@ -179,65 +164,16 @@ class PurePathBase:
          """
          return type(self)(*pathsegments)
  
-    @classmethod
-    def _parse_path(cls, path):
-        if not path:
-            return '', '', []
-        sep = cls.pathmod.sep
-        altsep = cls.pathmod.altsep
-        if altsep:
-            path = path.replace(altsep, sep)
-        drv, root, rel = cls.pathmod.splitroot(path)
-        if not root and drv.startswith(sep) and not drv.endswith(sep):
-            drv_parts = drv.split(sep)
-            if len(drv_parts) == 4 and drv_parts[2] not in '?.':
-                # e.g. //server/share
-                root = sep
-            elif len(drv_parts) == 6:
-                # e.g. //?/unc/server/share
-                root = sep
-        parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
-        return drv, root, parsed
-
-    def _load_parts(self):
-        paths = self._raw_paths
-        if len(paths) == 0:
-            path = ''
-        elif len(paths) == 1:
-            path = paths[0]
-        else:
-            path = self.pathmod.join(*paths)
-        drv, root, tail = self._parse_path(path)
-        self._drv = drv
-        self._root = root
-        self._tail_cached = tail
-
-    def _from_parsed_parts(self, drv, root, tail):
-        path_str = self._format_parsed_parts(drv, root, tail)
-        path = self.with_segments(path_str)
-        path._str = path_str or '.'
-        path._drv = drv
-        path._root = root
-        path._tail_cached = tail
-        return path
-
-    @classmethod
-    def _format_parsed_parts(cls, drv, root, tail):
-        if drv or root:
-            return drv + root + cls.pathmod.sep.join(tail)
-        elif tail and cls.pathmod.splitdrive(tail[0])[0]:
-            tail = ['.'] + tail
-        return cls.pathmod.sep.join(tail)
-
      def __str__(self):
          """Return the string representation of the path, suitable for
          passing to system calls."""
-        try:
-            return self._str
-        except AttributeError:
-            self._str = self._format_parsed_parts(self.drive, self.root,
-                                                  self._tail) or '.'
-            return self._str
+        paths = self._raw_paths
+        if len(paths) == 1:
+            return paths[0]
+        elif paths:
+            return self.pathmod.join(*paths)
+        else:
+            return ''
  
      def as_posix(self):
          """Return the string representation of the path with forward (/)
@@ -247,42 +183,23 @@ class PurePathBase:
      @property
      def drive(self):
          """The drive prefix (letter or UNC path), if any."""
-        try:
-            return self._drv
-        except AttributeError:
-            self._load_parts()
-            return self._drv
+        return self.pathmod.splitdrive(str(self))[0]
  
      @property
      def root(self):
          """The root of the path, if any."""
-        try:
-            return self._root
-        except AttributeError:
-            self._load_parts()
-            return self._root
-
-    @property
-    def _tail(self):
-        try:
-            return self._tail_cached
-        except AttributeError:
-            self._load_parts()
-            return self._tail_cached
+        return self.pathmod.splitroot(str(self))[1]
  
      @property
      def anchor(self):
          """The concatenation of the drive and root, or ''."""
-        anchor = self.drive + self.root
-        return anchor
+        drive, root, _ =  self.pathmod.splitroot(str(self))
+        return drive + root
  
      @property
      def name(self):
          """The final path component, if any."""
-        path_str = str(self)
-        if not path_str or path_str == '.':
-            return ''
-        return self.pathmod.basename(path_str)
+        return self.pathmod.basename(str(self))
  
      @property
      def suffix(self):
@@ -323,13 +240,10 @@ class PurePathBase:
  
      def with_name(self, name):
          """Return a new path with the file name changed."""
-        m = self.pathmod
-        if not name or m.sep in name or (m.altsep and m.altsep in name) or name == '.':
+        dirname = self.pathmod.dirname
+        if dirname(name):
              raise ValueError(f"Invalid name {name!r}")
-        parent, old_name = m.split(str(self))
-        if not old_name or old_name == '.':
-            raise ValueError(f"{self!r} has an empty name")
-        return self.with_segments(parent, name)
+        return self.with_segments(dirname(str(self)), name)
  
      def with_stem(self, stem):
          """Return a new path with the stem changed."""
@@ -480,7 +394,7 @@ class PurePathBase:
      def is_reserved(self):
          """Return True if the path contains one of the special names reserved
          by the system, if any."""
-        if self.pathmod is posixpath or not self._tail:
+        if self.pathmod is posixpath or not self.name:
              return False
  
          # NOTE: the rules for reserved names seem somewhat complicated
@@ -490,7 +404,7 @@ class PurePathBase:
          if self.drive.startswith('\\\\'):
              # UNC paths are never reserved.
              return False
-        name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ')
+        name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
          return name.upper() in _WIN_RESERVED_NAMES
  
      def match(self, path_pattern, *, case_sensitive=None):
@@ -503,9 +417,9 @@ class PurePathBase:
              case_sensitive = _is_case_sensitive(self.pathmod)
          sep = path_pattern.pathmod.sep
          pattern_str = str(path_pattern)
-        if path_pattern.drive or path_pattern.root:
+        if path_pattern.anchor:
              pass
-        elif path_pattern._tail:
+        elif path_pattern.parts:
              pattern_str = f'**{sep}{pattern_str}'
          else:
              raise ValueError("empty pattern")
@@ -780,8 +694,10 @@ class PathBase(PurePathBase):
          from contextlib import nullcontext
          return nullcontext(self.iterdir())
  
-    def _make_child_entry(self, entry):
+    def _make_child_entry(self, entry, is_dir=False):
          # Transform an entry yielded from _scandir() into a path object.
+        if is_dir:
+            return entry.joinpath('')
          return entry
  
      def _make_child_relpath(self, name):
@@ -792,13 +708,13 @@ class PathBase(PurePathBase):
          kind, including directories) matching the given relative pattern.
          """
          path_pattern = self.with_segments(pattern)
-        if path_pattern.drive or path_pattern.root:
+        if path_pattern.anchor:
              raise NotImplementedError("Non-relative patterns are unsupported")
-        elif not path_pattern._tail:
+        elif not path_pattern.parts:
              raise ValueError("Unacceptable pattern: {!r}".format(pattern))
  
-        pattern_parts = path_pattern._tail.copy()
-        if pattern[-1] in (self.pathmod.sep, self.pathmod.altsep):
+        pattern_parts = list(path_pattern.parts)
+        if not self.pathmod.basename(pattern):
              # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
              pattern_parts.append('')
  
@@ -816,7 +732,7 @@ class PathBase(PurePathBase):
          filter_paths = follow_symlinks is not None and '..' not in pattern_parts
          deduplicate_paths = False
          sep = self.pathmod.sep
-        paths = iter([self] if self.is_dir() else [])
+        paths = iter([self.joinpath('')] if self.is_dir() else [])
          part_idx = 0
          while part_idx < len(pattern_parts):
              part = pattern_parts[part_idx]
diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py

index 6e42122212632dc023b289a7e7ab51c52ef596f4..04e6280509ecfc94e988765b93e377adbb4a53a3 100644 (file)
--- a/Lib/test/test_pathlib/test_pathlib.py
+++ b/Lib/test/test_pathlib/test_pathlib.py
@@ -140,6 +140,8 @@ class PurePathTest(test_pathlib_abc.DummyPurePathTest):
          # The empty path points to '.'
          p = self.cls('')
          self.assertEqual(str(p), '.')
+        # Special case for the empty path.
+        self._check_str('.', ('',))
  
      def test_parts_interning(self):
          P = self.cls
@@ -300,6 +302,40 @@ class PurePathTest(test_pathlib_abc.DummyPurePathTest):
                  self.assertEqual(q, p)
                  self.assertEqual(repr(q), r)
  
+    def test_name_empty(self):
+        P = self.cls
+        self.assertEqual(P('').name, '')
+        self.assertEqual(P('.').name, '')
+        self.assertEqual(P('/a/b/.').name, 'b')
+
+    def test_stem_empty(self):
+        P = self.cls
+        self.assertEqual(P('').stem, '')
+        self.assertEqual(P('.').stem, '')
+
+    def test_with_name_empty(self):
+        P = self.cls
+        self.assertRaises(ValueError, P('').with_name, 'd.xml')
+        self.assertRaises(ValueError, P('.').with_name, 'd.xml')
+        self.assertRaises(ValueError, P('/').with_name, 'd.xml')
+        self.assertRaises(ValueError, P('a/b').with_name, '')
+        self.assertRaises(ValueError, P('a/b').with_name, '.')
+
+    def test_with_stem_empty(self):
+        P = self.cls
+        self.assertRaises(ValueError, P('').with_stem, 'd')
+        self.assertRaises(ValueError, P('.').with_stem, 'd')
+        self.assertRaises(ValueError, P('/').with_stem, 'd')
+        self.assertRaises(ValueError, P('a/b').with_stem, '')
+        self.assertRaises(ValueError, P('a/b').with_stem, '.')
+
+    def test_with_suffix_empty(self):
+        # Path doesn't have a "filename" component.
+        P = self.cls
+        self.assertRaises(ValueError, P('').with_suffix, '.gz')
+        self.assertRaises(ValueError, P('.').with_suffix, '.gz')
+        self.assertRaises(ValueError, P('/').with_suffix, '.gz')
+
      def test_relative_to_several_args(self):
          P = self.cls
          p = P('a/b')
@@ -313,6 +349,11 @@ class PurePathTest(test_pathlib_abc.DummyPurePathTest):
          with self.assertWarns(DeprecationWarning):
              p.is_relative_to('a', 'b')
  
+    def test_match_empty(self):
+        P = self.cls
+        self.assertRaises(ValueError, P('a').match, '')
+        self.assertRaises(ValueError, P('a').match, '.')
+
  
  class PurePosixPathTest(PurePathTest):
      cls = pathlib.PurePosixPath
diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py

index b088be8714172950822de697def4013f34e13c9b..7c8a0f4687f00cc3fcbd32e6d76c3e156ce3b285 100644 (file)
--- a/Lib/test/test_pathlib/test_pathlib_abc.py
+++ b/Lib/test/test_pathlib/test_pathlib_abc.py
@@ -153,8 +153,6 @@ class DummyPurePathTest(unittest.TestCase):
          # Canonicalized paths roundtrip.
          for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'):
              self._check_str(pathstr, (pathstr,))
-        # Special case for the empty path.
-        self._check_str('.', ('',))
          # Other tests for str() are in test_equivalences().
  
      def test_as_posix_common(self):
@@ -166,7 +164,6 @@ class DummyPurePathTest(unittest.TestCase):
      def test_match_empty(self):
          P = self.cls
          self.assertRaises(ValueError, P('a').match, '')
-        self.assertRaises(ValueError, P('a').match, '.')
  
      def test_match_common(self):
          P = self.cls
@@ -206,7 +203,6 @@ class DummyPurePathTest(unittest.TestCase):
          self.assertTrue(P('a/b/c.py').match('**'))
          self.assertTrue(P('/a/b/c.py').match('**'))
          self.assertTrue(P('/a/b/c.py').match('/**'))
-        self.assertTrue(P('/a/b/c.py').match('**/'))
          self.assertTrue(P('/a/b/c.py').match('/a/**'))
          self.assertTrue(P('/a/b/c.py').match('**/*.py'))
          self.assertTrue(P('/a/b/c.py').match('/**/*.py'))
@@ -270,17 +266,17 @@ class DummyPurePathTest(unittest.TestCase):
          self.assertEqual(len(par), 3)
          self.assertEqual(par[0], P('a/b'))
          self.assertEqual(par[1], P('a'))
-        self.assertEqual(par[2], P('.'))
-        self.assertEqual(par[-1], P('.'))
+        self.assertEqual(par[2], P(''))
+        self.assertEqual(par[-1], P(''))
          self.assertEqual(par[-2], P('a'))
          self.assertEqual(par[-3], P('a/b'))
          self.assertEqual(par[0:1], (P('a/b'),))
          self.assertEqual(par[:2], (P('a/b'), P('a')))
          self.assertEqual(par[:-1], (P('a/b'), P('a')))
-        self.assertEqual(par[1:], (P('a'), P('.')))
-        self.assertEqual(par[::2], (P('a/b'), P('.')))
-        self.assertEqual(par[::-1], (P('.'), P('a'), P('a/b')))
-        self.assertEqual(list(par), [P('a/b'), P('a'), P('.')])
+        self.assertEqual(par[1:], (P('a'), P('')))
+        self.assertEqual(par[::2], (P('a/b'), P('')))
+        self.assertEqual(par[::-1], (P(''), P('a'), P('a/b')))
+        self.assertEqual(list(par), [P('a/b'), P('a'), P('')])
          with self.assertRaises(IndexError):
              par[-4]
          with self.assertRaises(IndexError):
@@ -334,8 +330,8 @@ class DummyPurePathTest(unittest.TestCase):
      def test_name_empty(self):
          P = self.cls
          self.assertEqual(P('').name, '')
-        self.assertEqual(P('.').name, '')
-        self.assertEqual(P('/a/b/.').name, 'b')
+        self.assertEqual(P('.').name, '.')
+        self.assertEqual(P('/a/b/.').name, '.')
  
      def test_name_common(self):
          P = self.cls
@@ -387,7 +383,7 @@ class DummyPurePathTest(unittest.TestCase):
      def test_stem_empty(self):
          P = self.cls
          self.assertEqual(P('').stem, '')
-        self.assertEqual(P('.').stem, '')
+        self.assertEqual(P('.').stem, '.')
  
      def test_stem_common(self):
          P = self.cls
@@ -412,11 +408,11 @@ class DummyPurePathTest(unittest.TestCase):
  
      def test_with_name_empty(self):
          P = self.cls
-        self.assertRaises(ValueError, P('').with_name, 'd.xml')
-        self.assertRaises(ValueError, P('.').with_name, 'd.xml')
-        self.assertRaises(ValueError, P('/').with_name, 'd.xml')
-        self.assertRaises(ValueError, P('a/b').with_name, '')
-        self.assertRaises(ValueError, P('a/b').with_name, '.')
+        self.assertEqual(P('').with_name('d.xml'), P('d.xml'))
+        self.assertEqual(P('.').with_name('d.xml'), P('d.xml'))
+        self.assertEqual(P('/').with_name('d.xml'), P('/d.xml'))
+        self.assertEqual(P('a/b').with_name(''), P('a/'))
+        self.assertEqual(P('a/b').with_name('.'), P('a/.'))
  
      def test_with_name_seps(self):
          P = self.cls
@@ -436,11 +432,11 @@ class DummyPurePathTest(unittest.TestCase):
  
      def test_with_stem_empty(self):
          P = self.cls
-        self.assertRaises(ValueError, P('').with_stem, 'd')
-        self.assertRaises(ValueError, P('.').with_stem, 'd')
-        self.assertRaises(ValueError, P('/').with_stem, 'd')
-        self.assertRaises(ValueError, P('a/b').with_stem, '')
-        self.assertRaises(ValueError, P('a/b').with_stem, '.')
+        self.assertEqual(P('').with_stem('d'), P('d'))
+        self.assertEqual(P('.').with_stem('d'), P('d'))
+        self.assertEqual(P('/').with_stem('d'), P('/d'))
+        self.assertEqual(P('a/b').with_stem(''), P('a/'))
+        self.assertEqual(P('a/b').with_stem('.'), P('a/.'))
  
      def test_with_stem_seps(self):
          P = self.cls
@@ -461,9 +457,9 @@ class DummyPurePathTest(unittest.TestCase):
      def test_with_suffix_empty(self):
          P = self.cls
          # Path doesn't have a "filename" component.
-        self.assertRaises(ValueError, P('').with_suffix, '.gz')
-        self.assertRaises(ValueError, P('.').with_suffix, '.gz')
-        self.assertRaises(ValueError, P('/').with_suffix, '.gz')
+        self.assertEqual(P('').with_suffix('.gz'), P('.gz'))
+        self.assertEqual(P('.').with_suffix('.gz'), P('..gz'))
+        self.assertEqual(P('/').with_suffix('.gz'), P('/.gz'))
  
      def test_with_suffix_seps(self):
          P = self.cls
author	Barney Gale <barney.gale@gmail.com>
	Tue, 9 Jan 2024 23:52:15 +0000 (23:52 +0000)
committer	GitHub <noreply@github.com>
	Tue, 9 Jan 2024 23:52:15 +0000 (23:52 +0000)
Lib/pathlib/__init__.py		patch \| blob \| blame \| history
Lib/pathlib/_abc.py		patch \| blob \| blame \| history
Lib/test/test_pathlib/test_pathlib.py		patch \| blob \| blame \| history
Lib/test/test_pathlib/test_pathlib_abc.py		patch \| blob \| blame \| history