]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
GH-114847: Speed up `posixpath.realpath()` (#114848)
authorBarney Gale <barney.gale@gmail.com>
Fri, 5 Apr 2024 12:35:01 +0000 (13:35 +0100)
committerGitHub <noreply@github.com>
Fri, 5 Apr 2024 12:35:01 +0000 (12:35 +0000)
Apply the following optimizations to `posixpath.realpath()`:

- Remove use of recursion
- Construct child paths directly rather than using `join()`
- Use `os.getcwd[b]()` rather than `abspath()`
- Use `startswith(sep)` rather than `isabs()`
- Use slicing rather than `split()`

Co-authored-by: Petr Viktorin <encukou@gmail.com>
Lib/posixpath.py
Lib/test/test_posixpath.py
Misc/NEWS.d/next/Library/2024-02-01-08-09-20.gh-issue-114847.-JrWrR.rst [new file with mode: 0644]

index 76ee721bfb5e33916071e2c4a1c3af983ed9adc8..0e8bb5ab10d916c8728194815cac0863db15a21a 100644 (file)
@@ -403,55 +403,66 @@ def realpath(filename, *, strict=False):
     """Return the canonical path of the specified filename, eliminating any
 symbolic links encountered in the path."""
     filename = os.fspath(filename)
-    path, ok = _joinrealpath(filename[:0], filename, strict, {})
-    return abspath(path)
-
-# Join two paths, normalizing and eliminating any symbolic links
-# encountered in the second path.
-# Two leading slashes are replaced by a single slash.
-def _joinrealpath(path, rest, strict, seen):
-    if isinstance(path, bytes):
+    if isinstance(filename, bytes):
         sep = b'/'
         curdir = b'.'
         pardir = b'..'
+        getcwd = os.getcwdb
     else:
         sep = '/'
         curdir = '.'
         pardir = '..'
+        getcwd = os.getcwd
+
+    # The stack of unresolved path parts. When popped, a special value of None
+    # indicates that a symlink target has been resolved, and that the original
+    # symlink path can be retrieved by popping again. The [::-1] slice is a
+    # very fast way of spelling list(reversed(...)).
+    rest = filename.split(sep)[::-1]
+
+    # The resolved path, which is absolute throughout this function.
+    # Note: getcwd() returns a normalized and symlink-free path.
+    path = sep if filename.startswith(sep) else getcwd()
 
-    if rest.startswith(sep):
-        rest = rest[1:]
-        path = sep
+    # Mapping from symlink paths to *fully resolved* symlink targets. If a
+    # symlink is encountered but not yet resolved, the value is None. This is
+    # used both to detect symlink loops and to speed up repeated traversals of
+    # the same links.
+    seen = {}
+
+    # Whether we're calling lstat() and readlink() to resolve symlinks. If we
+    # encounter an OSError for a symlink loop in non-strict mode, this is
+    # switched off.
+    querying = True
 
     while rest:
-        name, _, rest = rest.partition(sep)
+        name = rest.pop()
+        if name is None:
+            # resolved symlink target
+            seen[rest.pop()] = path
+            continue
         if not name or name == curdir:
             # current dir
             continue
         if name == pardir:
             # parent dir
-            if path:
-                parent, name = split(path)
-                if name == pardir:
-                    # ../..
-                    path = join(path, pardir)
-                else:
-                    # foo/bar/.. -> foo
-                    path = parent
-            else:
-                # ..
-                path = pardir
+            path = path[:path.rindex(sep)] or sep
+            continue
+        if path == sep:
+            newpath = path + name
+        else:
+            newpath = path + sep + name
+        if not querying:
+            path = newpath
             continue
-        newpath = join(path, name)
         try:
             st = os.lstat(newpath)
+            if not stat.S_ISLNK(st.st_mode):
+                path = newpath
+                continue
         except OSError:
             if strict:
                 raise
-            is_link = False
-        else:
-            is_link = stat.S_ISLNK(st.st_mode)
-        if not is_link:
             path = newpath
             continue
         # Resolve the symbolic link
@@ -467,14 +478,23 @@ def _joinrealpath(path, rest, strict, seen):
                 os.stat(newpath)
             else:
                 # Return already resolved part + rest of the path unchanged.
-                return join(newpath, rest), False
+                path = newpath
+                querying = False
+                continue
         seen[newpath] = None # not resolved symlink
-        path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
-        if not ok:
-            return join(path, rest), False
-        seen[newpath] = path # resolved symlink
+        target = os.readlink(newpath)
+        if target.startswith(sep):
+            # Symlink target is absolute; reset resolved path.
+            path = sep
+        # Push the symlink path onto the stack, and signal its specialness by
+        # also pushing None. When these entries are popped, we'll record the
+        # fully-resolved symlink target in the 'seen' mapping.
+        rest.append(newpath)
+        rest.append(None)
+        # Push the unresolved symlink target parts onto the stack.
+        rest.extend(target.split(sep)[::-1])
 
-    return path, True
+    return path
 
 
 supports_unicode_filenames = (sys.platform == 'darwin')
index cbb7c4c52d969793ea46ebcca10798d1244f8b36..807f985f7f4df786e9a1a2afda496f69f05cf400 100644 (file)
@@ -456,6 +456,15 @@ class PosixPathTest(unittest.TestCase):
         finally:
             os_helper.unlink(ABSTFN)
 
+    @os_helper.skip_unless_symlink
+    @skip_if_ABSTFN_contains_backslash
+    def test_realpath_missing_pardir(self):
+        try:
+            os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN)
+            self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1")
+        finally:
+            os_helper.unlink(os_helper.TESTFN)
+
     @os_helper.skip_unless_symlink
     @skip_if_ABSTFN_contains_backslash
     def test_realpath_symlink_loops(self):
diff --git a/Misc/NEWS.d/next/Library/2024-02-01-08-09-20.gh-issue-114847.-JrWrR.rst b/Misc/NEWS.d/next/Library/2024-02-01-08-09-20.gh-issue-114847.-JrWrR.rst
new file mode 100644 (file)
index 0000000..bf011fe
--- /dev/null
@@ -0,0 +1 @@
+Speed up :func:`os.path.realpath` on non-Windows platforms.