The recently added PyConfig.stdlib_dir was being set with ".." entries. When __file__ was added for from modules this caused a problem on out-of-tree builds. This PR fixes that by normalizing "stdlib_dir" when it is calculated in getpath.c.
https://bugs.python.org/issue45506
const wchar_t *relfile,
size_t bufsize);
extern size_t _Py_find_basename(const wchar_t *filename);
+PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path,
+ wchar_t *buf, const size_t buf_len);
+
// Macros to protect CRT calls against instant termination when passed an
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
--- /dev/null
+# Run tests for functions in Python/fileutils.c.
+
+import os
+import os.path
+import unittest
+from test.support import import_helper
+
+# Skip this test if the _testcapi module isn't available.
+_testcapi = import_helper.import_module('_testinternalcapi')
+
+
+class PathTests(unittest.TestCase):
+
+ def test_capi_normalize_path(self):
+ if os.name == 'nt':
+ raise unittest.SkipTest('Windows has its own helper for this')
+ else:
+ from .test_posixpath import PosixPathTest as posixdata
+ tests = posixdata.NORMPATH_CASES
+ for filename, expected in tests:
+ if not os.path.isabs(filename):
+ continue
+ with self.subTest(filename):
+ result = _testcapi.normalize_path(filename)
+ self.assertEqual(result, expected,
+ msg=f'input: {filename!r} expected output: {expected!r}')
+
+
+if __name__ == "__main__":
+ unittest.main()
for path in ('~', '~/.local', '~vstinner/'):
self.assertEqual(posixpath.expanduser(path), path)
+ NORMPATH_CASES = [
+ ("", "."),
+ ("/", "/"),
+ ("/.", "/"),
+ ("/./", "/"),
+ ("/.//.", "/"),
+ ("/foo", "/foo"),
+ ("/foo/bar", "/foo/bar"),
+ ("//", "//"),
+ ("///", "/"),
+ ("///foo/.//bar//", "/foo/bar"),
+ ("///foo/.//bar//.//..//.//baz///", "/foo/baz"),
+ ("///..//./foo/.//bar", "/foo/bar"),
+ (".", "."),
+ (".//.", "."),
+ ("..", ".."),
+ ("../", ".."),
+ ("../foo", "../foo"),
+ ("../../foo", "../../foo"),
+ ("../foo/../bar", "../bar"),
+ ("../../foo/../bar/./baz/boom/..", "../../bar/baz"),
+ ("/..", "/"),
+ ("/..", "/"),
+ ("/../", "/"),
+ ("/..//", "/"),
+ ("//..", "//"),
+ ("/../foo", "/foo"),
+ ("/../../foo", "/foo"),
+ ("/../foo/../", "/"),
+ ("/../foo/../bar", "/bar"),
+ ("/../../foo/../bar/./baz/boom/..", "/bar/baz"),
+ ("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"),
+ ]
+
def test_normpath(self):
- self.assertEqual(posixpath.normpath(""), ".")
- self.assertEqual(posixpath.normpath("/"), "/")
- self.assertEqual(posixpath.normpath("//"), "//")
- self.assertEqual(posixpath.normpath("///"), "/")
- self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar")
- self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"),
- "/foo/baz")
- self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar")
-
- self.assertEqual(posixpath.normpath(b""), b".")
- self.assertEqual(posixpath.normpath(b"/"), b"/")
- self.assertEqual(posixpath.normpath(b"//"), b"//")
- self.assertEqual(posixpath.normpath(b"///"), b"/")
- self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar")
- self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
- b"/foo/baz")
- self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"),
- b"/foo/bar")
+ for path, expected in self.NORMPATH_CASES:
+ with self.subTest(path):
+ result = posixpath.normpath(path)
+ self.assertEqual(result, expected)
+
+ path = path.encode('utf-8')
+ expected = expected.encode('utf-8')
+ with self.subTest(path, type=bytes):
+ result = posixpath.normpath(path)
+ self.assertEqual(result, expected)
@skip_if_ABSTFN_contains_backslash
def test_realpath_curdir(self):
#include "Python.h"
#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
#include "pycore_bitutils.h" // _Py_bswap32()
+#include "pycore_fileutils.h" // _Py_normalize_path
#include "pycore_gc.h" // PyGC_Head
#include "pycore_hashtable.h" // _Py_hashtable_new()
#include "pycore_initconfig.h" // _Py_GetConfigsAsDict()
#include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy()
#include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost()
#include "pycore_pystate.h" // _PyThreadState_GET()
+#include "osdefs.h" // MAXPATHLEN
static PyObject *
}
+static PyObject *
+normalize_path(PyObject *self, PyObject *filename)
+{
+ Py_ssize_t size = -1;
+ wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size);
+ if (encoded == NULL) {
+ return NULL;
+ }
+
+ wchar_t buf[MAXPATHLEN + 1];
+ int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf));
+ PyMem_Free(encoded);
+ if (res != 0) {
+ PyErr_SetString(PyExc_ValueError, "string too long");
+ return NULL;
+ }
+
+ return PyUnicode_FromWideChar(buf, -1);
+}
+
+
static PyMethodDef TestMethods[] = {
{"get_configs", get_configs, METH_NOARGS},
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
{"set_config", test_set_config, METH_O},
{"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
{"test_edit_cost", test_edit_cost, METH_NOARGS},
+ {"normalize_path", normalize_path, METH_O, NULL},
{NULL, NULL} /* sentinel */
};
}
+static PyStatus
+calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
+{
+ // Note that, unlike calculate_set_prefix(), here we allow a negative
+ // prefix_found. That means the source tree Lib dir gets used.
+ if (!calculate->prefix_found) {
+ return _PyStatus_OK();
+ }
+ PyStatus status;
+ wchar_t *prefix = calculate->prefix;
+ if (!_Py_isabs(prefix)) {
+ prefix = _PyMem_RawWcsdup(prefix);
+ if (prefix == NULL) {
+ return _PyStatus_NO_MEMORY();
+ }
+ status = absolutize(&prefix);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
+ }
+ }
+ wchar_t buf[MAXPATHLEN + 1];
+ int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf));
+ if (prefix != calculate->prefix) {
+ PyMem_RawFree(prefix);
+ }
+ if (res < 0) {
+ return PATHLEN_ERR();
+ }
+ pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf);
+ if (pathconfig->stdlib_dir == NULL) {
+ return _PyStatus_NO_MEMORY();
+ }
+ return _PyStatus_OK();
+}
+
+
static PyStatus
calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig)
{
}
if (pathconfig->stdlib_dir == NULL) {
- if (calculate->prefix_found) {
- /* This must be done *before* calculate_set_prefix() is called. */
- pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix);
- if (pathconfig->stdlib_dir == NULL) {
- return _PyStatus_NO_MEMORY();
- }
+ /* This must be done *before* calculate_set_prefix() is called. */
+ status = calculate_set_stdlib_dir(calculate, pathconfig);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
}
}
}
+/* Remove navigation elements such as "." and "..".
+
+ This is mostly a C implementation of posixpath.normpath().
+ Return 0 on success. Return -1 if "orig" is too big for the buffer. */
+int
+_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len)
+{
+ assert(path && *path != L'\0');
+ assert(*path == SEP); // an absolute path
+ if (wcslen(path) + 1 >= buf_len) {
+ return -1;
+ }
+
+ int dots = -1;
+ int check_leading = 1;
+ const wchar_t *buf_start = buf;
+ wchar_t *buf_next = buf;
+ // The resulting filename will never be longer than path.
+ for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) {
+ wchar_t c = *remainder;
+ buf_next[0] = c;
+ buf_next++;
+ if (c == SEP) {
+ assert(dots <= 2);
+ if (dots == 2) {
+ // Turn "/x/y/../z" into "/x/z".
+ buf_next -= 4; // "/../"
+ assert(*buf_next == SEP);
+ // We cap it off at the root, so "/../spam" becomes "/spam".
+ if (buf_next == buf_start) {
+ buf_next++;
+ }
+ else {
+ // Move to the previous SEP in the buffer.
+ while (*(buf_next - 1) != SEP) {
+ assert(buf_next != buf_start);
+ buf_next--;
+ }
+ }
+ }
+ else if (dots == 1) {
+ // Turn "/./" into "/".
+ buf_next -= 2; // "./"
+ assert(*(buf_next - 1) == SEP);
+ }
+ else if (dots == 0) {
+ // Turn "//" into "/".
+ buf_next--;
+ assert(*(buf_next - 1) == SEP);
+ if (check_leading) {
+ if (buf_next - 1 == buf && *(remainder + 1) != SEP) {
+ // Leave a leading "//" alone, unless "///...".
+ buf_next++;
+ buf_start++;
+ }
+ check_leading = 0;
+ }
+ }
+ dots = 0;
+ }
+ else {
+ check_leading = 0;
+ if (dots >= 0) {
+ if (c == L'.' && dots < 2) {
+ dots++;
+ }
+ else {
+ dots = -1;
+ }
+ }
+ }
+ }
+ if (dots >= 0) {
+ // Strip any trailing dots and trailing slash.
+ buf_next -= dots + 1; // "/" or "/." or "/.."
+ assert(*buf_next == SEP);
+ if (buf_next == buf_start) {
+ // Leave the leading slash for root.
+ buf_next++;
+ }
+ else {
+ if (dots == 2) {
+ // Move to the previous SEP in the buffer.
+ do {
+ assert(buf_next != buf_start);
+ buf_next--;
+ } while (*(buf_next) != SEP);
+ }
+ }
+ }
+ *buf_next = L'\0';
+ return 0;
+}
+
+
/* Get the current directory. buflen is the buffer size in wide characters
including the null character. Decode the path from the locale encoding.