]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-45506: Normalize _PyPathConfig.stdlib_dir when calculated. (#29040)
authorEric Snow <ericsnowcurrently@gmail.com>
Fri, 22 Oct 2021 23:20:03 +0000 (17:20 -0600)
committerGitHub <noreply@github.com>
Fri, 22 Oct 2021 23:20:03 +0000 (17:20 -0600)
The recently added PyConfig.stdlib_dir was being set with ".." entries. When __file__ was added for from modules this caused a problem on out-of-tree builds. This PR fixes that by normalizing "stdlib_dir" when it is calculated in getpath.c.

https://bugs.python.org/issue45506

Include/internal/pycore_fileutils.h
Lib/test/test_fileutils.py [new file with mode: 0644]
Lib/test/test_posixpath.py
Modules/_testinternalcapi.c
Modules/getpath.c
Python/fileutils.c

index ab436ae9b007acc39e665096effe88128b5d4975..d1caf9c237234932e17ec0a5a427ea1f7d865b0e 100644 (file)
@@ -80,6 +80,9 @@ extern int _Py_add_relfile(wchar_t *dirname,
                            const wchar_t *relfile,
                            size_t bufsize);
 extern size_t _Py_find_basename(const wchar_t *filename);
+PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path,
+                                   wchar_t *buf, const size_t buf_len);
+
 
 // Macros to protect CRT calls against instant termination when passed an
 // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
diff --git a/Lib/test/test_fileutils.py b/Lib/test/test_fileutils.py
new file mode 100644 (file)
index 0000000..45b3f32
--- /dev/null
@@ -0,0 +1,30 @@
+# Run tests for functions in Python/fileutils.c.
+
+import os
+import os.path
+import unittest
+from test.support import import_helper
+
+# Skip this test if the _testcapi module isn't available.
+_testcapi = import_helper.import_module('_testinternalcapi')
+
+
+class PathTests(unittest.TestCase):
+
+    def test_capi_normalize_path(self):
+        if os.name == 'nt':
+            raise unittest.SkipTest('Windows has its own helper for this')
+        else:
+            from .test_posixpath import PosixPathTest as posixdata
+            tests = posixdata.NORMPATH_CASES
+        for filename, expected in tests:
+            if not os.path.isabs(filename):
+                continue
+            with self.subTest(filename):
+                result = _testcapi.normalize_path(filename)
+                self.assertEqual(result, expected,
+                    msg=f'input: {filename!r} expected output: {expected!r}')
+
+
+if __name__ == "__main__":
+    unittest.main()
index 8d398ec0103544555d9502c83243cc870ba485e1..e4d8384ef0b4bc857fe6539101e33ead862c08af 100644 (file)
@@ -304,25 +304,51 @@ class PosixPathTest(unittest.TestCase):
                 for path in ('~', '~/.local', '~vstinner/'):
                     self.assertEqual(posixpath.expanduser(path), path)
 
+    NORMPATH_CASES = [
+        ("", "."),
+        ("/", "/"),
+        ("/.", "/"),
+        ("/./", "/"),
+        ("/.//.", "/"),
+        ("/foo", "/foo"),
+        ("/foo/bar", "/foo/bar"),
+        ("//", "//"),
+        ("///", "/"),
+        ("///foo/.//bar//", "/foo/bar"),
+        ("///foo/.//bar//.//..//.//baz///", "/foo/baz"),
+        ("///..//./foo/.//bar", "/foo/bar"),
+        (".", "."),
+        (".//.", "."),
+        ("..", ".."),
+        ("../", ".."),
+        ("../foo", "../foo"),
+        ("../../foo", "../../foo"),
+        ("../foo/../bar", "../bar"),
+        ("../../foo/../bar/./baz/boom/..", "../../bar/baz"),
+        ("/..", "/"),
+        ("/..", "/"),
+        ("/../", "/"),
+        ("/..//", "/"),
+        ("//..", "//"),
+        ("/../foo", "/foo"),
+        ("/../../foo", "/foo"),
+        ("/../foo/../", "/"),
+        ("/../foo/../bar", "/bar"),
+        ("/../../foo/../bar/./baz/boom/..", "/bar/baz"),
+        ("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"),
+    ]
+
     def test_normpath(self):
-        self.assertEqual(posixpath.normpath(""), ".")
-        self.assertEqual(posixpath.normpath("/"), "/")
-        self.assertEqual(posixpath.normpath("//"), "//")
-        self.assertEqual(posixpath.normpath("///"), "/")
-        self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar")
-        self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"),
-                         "/foo/baz")
-        self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar")
-
-        self.assertEqual(posixpath.normpath(b""), b".")
-        self.assertEqual(posixpath.normpath(b"/"), b"/")
-        self.assertEqual(posixpath.normpath(b"//"), b"//")
-        self.assertEqual(posixpath.normpath(b"///"), b"/")
-        self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar")
-        self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
-                         b"/foo/baz")
-        self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"),
-                         b"/foo/bar")
+        for path, expected in self.NORMPATH_CASES:
+            with self.subTest(path):
+                result = posixpath.normpath(path)
+                self.assertEqual(result, expected)
+
+            path = path.encode('utf-8')
+            expected = expected.encode('utf-8')
+            with self.subTest(path, type=bytes):
+                result = posixpath.normpath(path)
+                self.assertEqual(result, expected)
 
     @skip_if_ABSTFN_contains_backslash
     def test_realpath_curdir(self):
index 1ca06069e11199f22b75c718bacfb6acca5ef969..1f205b873beaf6755297ae8a9bdb1c4458d1b2ca 100644 (file)
 #include "Python.h"
 #include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
 #include "pycore_bitutils.h"     // _Py_bswap32()
+#include "pycore_fileutils.h"    // _Py_normalize_path
 #include "pycore_gc.h"           // PyGC_Head
 #include "pycore_hashtable.h"    // _Py_hashtable_new()
 #include "pycore_initconfig.h"   // _Py_GetConfigsAsDict()
 #include "pycore_interp.h"       // _PyInterpreterState_GetConfigCopy()
 #include "pycore_pyerrors.h"     // _Py_UTF8_Edit_Cost()
 #include "pycore_pystate.h"      // _PyThreadState_GET()
+#include "osdefs.h"               // MAXPATHLEN
 
 
 static PyObject *
@@ -366,6 +368,27 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args))
 }
 
 
+static PyObject *
+normalize_path(PyObject *self, PyObject *filename)
+{
+    Py_ssize_t size = -1;
+    wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size);
+    if (encoded == NULL) {
+        return NULL;
+    }
+
+    wchar_t buf[MAXPATHLEN + 1];
+    int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf));
+    PyMem_Free(encoded);
+    if (res != 0) {
+        PyErr_SetString(PyExc_ValueError, "string too long");
+        return NULL;
+    }
+
+    return PyUnicode_FromWideChar(buf, -1);
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -377,6 +400,7 @@ static PyMethodDef TestMethods[] = {
     {"set_config", test_set_config, METH_O},
     {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
     {"test_edit_cost", test_edit_cost, METH_NOARGS},
+    {"normalize_path", normalize_path, METH_O, NULL},
     {NULL, NULL} /* sentinel */
 };
 
index 1405023b39b580d7ca1b2bf8bb4fe72ed54bce00..4dbd502ddcf045e24174f9e95a234df2af5c6dd5 100644 (file)
@@ -519,6 +519,42 @@ search_for_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig,
 }
 
 
+static PyStatus
+calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
+{
+    // Note that, unlike calculate_set_prefix(), here we allow a negative
+    // prefix_found.  That means the source tree Lib dir gets used.
+    if (!calculate->prefix_found) {
+        return _PyStatus_OK();
+    }
+    PyStatus status;
+    wchar_t *prefix = calculate->prefix;
+    if (!_Py_isabs(prefix)) {
+        prefix = _PyMem_RawWcsdup(prefix);
+        if (prefix == NULL) {
+            return _PyStatus_NO_MEMORY();
+        }
+        status = absolutize(&prefix);
+        if (_PyStatus_EXCEPTION(status)) {
+            return status;
+        }
+    }
+    wchar_t buf[MAXPATHLEN + 1];
+    int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf));
+    if (prefix != calculate->prefix) {
+        PyMem_RawFree(prefix);
+    }
+    if (res < 0) {
+        return PATHLEN_ERR();
+    }
+    pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf);
+    if (pathconfig->stdlib_dir == NULL) {
+        return _PyStatus_NO_MEMORY();
+    }
+    return _PyStatus_OK();
+}
+
+
 static PyStatus
 calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
 {
@@ -1494,12 +1530,10 @@ calculate_path(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
     }
 
     if (pathconfig->stdlib_dir == NULL) {
-        if (calculate->prefix_found) {
-            /* This must be done *before* calculate_set_prefix() is called. */
-            pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix);
-            if (pathconfig->stdlib_dir == NULL) {
-                return _PyStatus_NO_MEMORY();
-            }
+        /* This must be done *before* calculate_set_prefix() is called. */
+        status = calculate_set_stdlib_dir(calculate, pathconfig);
+        if (_PyStatus_EXCEPTION(status)) {
+            return status;
         }
     }
 
index 3d8f3a4f16326c02787e20444655b325afa3ef16..ac0046cdac37c37f6723f14cc8a8e19ba67b51d7 100644 (file)
@@ -2181,6 +2181,101 @@ _Py_find_basename(const wchar_t *filename)
 }
 
 
+/* Remove navigation elements such as "." and "..".
+
+   This is mostly a C implementation of posixpath.normpath().
+   Return 0 on success.  Return -1 if "orig" is too big for the buffer. */
+int
+_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len)
+{
+    assert(path && *path != L'\0');
+    assert(*path == SEP);  // an absolute path
+    if (wcslen(path) + 1 >= buf_len) {
+        return -1;
+    }
+
+    int dots = -1;
+    int check_leading = 1;
+    const wchar_t *buf_start = buf;
+    wchar_t *buf_next = buf;
+    // The resulting filename will never be longer than path.
+    for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) {
+        wchar_t c = *remainder;
+        buf_next[0] = c;
+        buf_next++;
+        if (c == SEP) {
+            assert(dots <= 2);
+            if (dots == 2) {
+                // Turn "/x/y/../z" into "/x/z".
+                buf_next -= 4;  // "/../"
+                assert(*buf_next == SEP);
+                // We cap it off at the root, so "/../spam" becomes "/spam".
+                if (buf_next == buf_start) {
+                    buf_next++;
+                }
+                else {
+                    // Move to the previous SEP in the buffer.
+                    while (*(buf_next - 1) != SEP) {
+                        assert(buf_next != buf_start);
+                        buf_next--;
+                    }
+                }
+            }
+            else if (dots == 1) {
+                // Turn "/./" into "/".
+                buf_next -= 2;  // "./"
+                assert(*(buf_next - 1) == SEP);
+            }
+            else if (dots == 0) {
+                // Turn "//" into "/".
+                buf_next--;
+                assert(*(buf_next - 1) == SEP);
+                if (check_leading) {
+                    if (buf_next - 1 == buf && *(remainder + 1) != SEP) {
+                        // Leave a leading "//" alone, unless "///...".
+                        buf_next++;
+                        buf_start++;
+                    }
+                    check_leading = 0;
+                }
+            }
+            dots = 0;
+        }
+        else {
+            check_leading = 0;
+            if (dots >= 0) {
+                if (c == L'.' && dots < 2) {
+                    dots++;
+                }
+                else {
+                    dots = -1;
+                }
+            }
+        }
+    }
+    if (dots >= 0) {
+        // Strip any trailing dots and trailing slash.
+        buf_next -= dots + 1;  // "/" or "/." or "/.."
+        assert(*buf_next == SEP);
+        if (buf_next == buf_start) {
+            // Leave the leading slash for root.
+            buf_next++;
+        }
+        else {
+            if (dots == 2) {
+                // Move to the previous SEP in the buffer.
+                do {
+                    assert(buf_next != buf_start);
+                    buf_next--;
+                } while (*(buf_next) != SEP);
+            }
+        }
+    }
+    *buf_next = L'\0';
+    return 0;
+}
+
+
 /* Get the current directory. buflen is the buffer size in wide characters
    including the null character. Decode the path from the locale encoding.