extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd);
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
+extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize);
+
// Macros to protect CRT calls against instant termination when passed an
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
// Usage:
return drive, root + tail
-def splitroot(p):
- """Split a pathname into drive, root and tail. The drive is defined
- exactly as in splitdrive(). On Windows, the root may be a single path
- separator or an empty string. The tail contains anything after the root.
- For example:
-
- splitroot('//server/share/') == ('//server/share', '/', '')
- splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
- splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
- splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
- """
- p = os.fspath(p)
- if isinstance(p, bytes):
- sep = b'\\'
- altsep = b'/'
- colon = b':'
- unc_prefix = b'\\\\?\\UNC\\'
- empty = b''
- else:
- sep = '\\'
- altsep = '/'
- colon = ':'
- unc_prefix = '\\\\?\\UNC\\'
- empty = ''
- normp = p.replace(altsep, sep)
- if normp[:1] == sep:
- if normp[1:2] == sep:
- # UNC drives, e.g. \\server\share or \\?\UNC\server\share
- # Device drives, e.g. \\.\device or \\?\device
- start = 8 if normp[:8].upper() == unc_prefix else 2
- index = normp.find(sep, start)
- if index == -1:
- return p, empty, empty
- index2 = normp.find(sep, index + 1)
- if index2 == -1:
- return p, empty, empty
- return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
+try:
+ from nt import _path_splitroot_ex
+except ImportError:
+ def splitroot(p):
+ """Split a pathname into drive, root and tail. The drive is defined
+ exactly as in splitdrive(). On Windows, the root may be a single path
+ separator or an empty string. The tail contains anything after the root.
+ For example:
+
+ splitroot('//server/share/') == ('//server/share', '/', '')
+ splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
+ splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
+ splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
+ """
+ p = os.fspath(p)
+ if isinstance(p, bytes):
+ sep = b'\\'
+ altsep = b'/'
+ colon = b':'
+ unc_prefix = b'\\\\?\\UNC\\'
+ empty = b''
else:
- # Relative path with root, e.g. \Windows
- return empty, p[:1], p[1:]
- elif normp[1:2] == colon:
- if normp[2:3] == sep:
- # Absolute drive-letter path, e.g. X:\Windows
- return p[:2], p[2:3], p[3:]
+ sep = '\\'
+ altsep = '/'
+ colon = ':'
+ unc_prefix = '\\\\?\\UNC\\'
+ empty = ''
+ normp = p.replace(altsep, sep)
+ if normp[:1] == sep:
+ if normp[1:2] == sep:
+ # UNC drives, e.g. \\server\share or \\?\UNC\server\share
+ # Device drives, e.g. \\.\device or \\?\device
+ start = 8 if normp[:8].upper() == unc_prefix else 2
+ index = normp.find(sep, start)
+ if index == -1:
+ return p, empty, empty
+ index2 = normp.find(sep, index + 1)
+ if index2 == -1:
+ return p, empty, empty
+ return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
+ else:
+ # Relative path with root, e.g. \Windows
+ return empty, p[:1], p[1:]
+ elif normp[1:2] == colon:
+ if normp[2:3] == sep:
+ # Absolute drive-letter path, e.g. X:\Windows
+ return p[:2], p[2:3], p[3:]
+ else:
+ # Relative path with drive, e.g. X:Windows
+ return p[:2], empty, p[2:]
else:
- # Relative path with drive, e.g. X:Windows
- return p[:2], empty, p[2:]
- else:
- # Relative path, e.g. Windows
- return empty, empty, p
+ # Relative path, e.g. Windows
+ return empty, empty, p
+else:
+ def splitroot(p):
+ """Split a pathname into drive, root and tail. The drive is defined
+ exactly as in splitdrive(). On Windows, the root may be a single path
+ separator or an empty string. The tail contains anything after the root.
+ For example:
+
+ splitroot('//server/share/') == ('//server/share', '/', '')
+ splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
+ splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
+ splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
+ """
+ p = os.fspath(p)
+ if isinstance(p, bytes):
+ drive, root, tail = _path_splitroot_ex(os.fsdecode(p))
+ return os.fsencode(drive), os.fsencode(root), os.fsencode(tail)
+ return _path_splitroot_ex(p)
# Split a path in head (everything up to the last '/') and tail (the
return p[:0], p
-def splitroot(p):
- """Split a pathname into drive, root and tail. On Posix, drive is always
- empty; the root may be empty, a single slash, or two slashes. The tail
- contains anything after the root. For example:
-
- splitroot('foo/bar') == ('', '', 'foo/bar')
- splitroot('/foo/bar') == ('', '/', 'foo/bar')
- splitroot('//foo/bar') == ('', '//', 'foo/bar')
- splitroot('///foo/bar') == ('', '/', '//foo/bar')
- """
- p = os.fspath(p)
- if isinstance(p, bytes):
- sep = b'/'
- empty = b''
- else:
- sep = '/'
- empty = ''
- if p[:1] != sep:
- # Relative path, e.g.: 'foo'
- return empty, empty, p
- elif p[1:2] != sep or p[2:3] == sep:
- # Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
- return empty, sep, p[1:]
- else:
- # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
- # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
- return empty, p[:2], p[2:]
+try:
+ from posix import _path_splitroot_ex
+except ImportError:
+ def splitroot(p):
+ """Split a pathname into drive, root and tail. On Posix, drive is always
+ empty; the root may be empty, a single slash, or two slashes. The tail
+ contains anything after the root. For example:
+
+ splitroot('foo/bar') == ('', '', 'foo/bar')
+ splitroot('/foo/bar') == ('', '/', 'foo/bar')
+ splitroot('//foo/bar') == ('', '//', 'foo/bar')
+ splitroot('///foo/bar') == ('', '/', '//foo/bar')
+ """
+ p = os.fspath(p)
+ if isinstance(p, bytes):
+ sep = b'/'
+ empty = b''
+ else:
+ sep = '/'
+ empty = ''
+ if p[:1] != sep:
+ # Relative path, e.g.: 'foo'
+ return empty, empty, p
+ elif p[1:2] != sep or p[2:3] == sep:
+ # Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
+ return empty, sep, p[1:]
+ else:
+ # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
+ # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
+ return empty, p[:2], p[2:]
+else:
+ def splitroot(p):
+ """Split a pathname into drive, root and tail. On Posix, drive is always
+ empty; the root may be empty, a single slash, or two slashes. The tail
+ contains anything after the root. For example:
+
+ splitroot('foo/bar') == ('', '', 'foo/bar')
+ splitroot('/foo/bar') == ('', '/', 'foo/bar')
+ splitroot('//foo/bar') == ('', '//', 'foo/bar')
+ splitroot('///foo/bar') == ('', '/', '//foo/bar')
+ """
+ p = os.fspath(p)
+ if isinstance(p, bytes):
+ # Optimisation: the drive is always empty
+ _, root, tail = _path_splitroot_ex(os.fsdecode(p))
+ return b'', os.fsencode(root), os.fsencode(tail)
+ return _path_splitroot_ex(p)
# Return the tail (basename) part of a path, same as split(path)[1].
tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\')
tester("ntpath.normpath('\\\\foo')", '\\\\foo')
tester("ntpath.normpath('\\\\')", '\\\\')
+ tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\')
def test_realpath_curdir(self):
expected = ntpath.normpath(os.getcwd())
--- /dev/null
+Speed up :func:`os.path.splitroot` with a native implementation.
#endif /* defined(MS_WINDOWS) */
+PyDoc_STRVAR(os__path_splitroot_ex__doc__,
+"_path_splitroot_ex($module, /, path)\n"
+"--\n"
+"\n");
+
+#define OS__PATH_SPLITROOT_EX_METHODDEF \
+ {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__},
+
+static PyObject *
+os__path_splitroot_ex_impl(PyObject *module, PyObject *path);
+
+static PyObject *
+os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(path), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"path", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "_path_splitroot_ex",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ PyObject *path;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (!PyUnicode_Check(args[0])) {
+ _PyArg_BadArgument("_path_splitroot_ex", "argument 'path'", "str", args[0]);
+ goto exit;
+ }
+ path = args[0];
+ return_value = os__path_splitroot_ex_impl(module, path);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(os__path_normpath__doc__,
"_path_normpath($module, /, path)\n"
"--\n"
#ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF
#define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF
#endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */
-/*[clinic end generated code: output=511f0788a6b90db0 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=c4698b47007cd6eb input=a9049054013a1b77]*/
#endif /* MS_WINDOWS */
+/*[clinic input]
+os._path_splitroot_ex
+
+ path: unicode
+
+[clinic start generated code]*/
+
+static PyObject *
+os__path_splitroot_ex_impl(PyObject *module, PyObject *path)
+/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/
+{
+ Py_ssize_t len, drvsize, rootsize;
+ PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL;
+
+ wchar_t *buffer = PyUnicode_AsWideCharString(path, &len);
+ if (!buffer) {
+ goto exit;
+ }
+
+ _Py_skiproot(buffer, len, &drvsize, &rootsize);
+ drv = PyUnicode_FromWideChar(buffer, drvsize);
+ if (drv == NULL) {
+ goto exit;
+ }
+ root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize);
+ if (root == NULL) {
+ goto exit;
+ }
+ tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize],
+ len - drvsize - rootsize);
+ if (tail == NULL) {
+ goto exit;
+ }
+ result = Py_BuildValue("(OOO)", drv, root, tail);
+exit:
+ PyMem_Free(buffer);
+ Py_XDECREF(drv);
+ Py_XDECREF(root);
+ Py_XDECREF(tail);
+ return result;
+}
+
+
/*[clinic input]
os._path_normpath
OS__FINDFIRSTFILE_METHODDEF
OS__GETVOLUMEPATHNAME_METHODDEF
OS__PATH_SPLITROOT_METHODDEF
+ OS__PATH_SPLITROOT_EX_METHODDEF
OS__PATH_NORMPATH_METHODDEF
OS_GETLOADAVG_METHODDEF
OS_URANDOM_METHODDEF
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
+void
+_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
+ Py_ssize_t *rootsize)
+{
+ assert(drvsize);
+ assert(rootsize);
+#ifndef MS_WINDOWS
+#define IS_SEP(x) (*(x) == SEP)
+ *drvsize = 0;
+ if (!IS_SEP(&path[0])) {
+ // Relative path, e.g.: 'foo'
+ *rootsize = 0;
+ }
+ else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
+ // Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
+ *rootsize = 1;
+ }
+ else {
+ // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
+ // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
+ *rootsize = 2;
+ }
+#undef IS_SEP
+#else
+ const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
+#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
+#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
+#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
+ if (IS_SEP(&path[0])) {
+ if (IS_SEP(&path[1])) {
+ // Device drives, e.g. \\.\device or \\?\device
+ // UNC drives, e.g. \\server\share or \\?\UNC\server\share
+ Py_ssize_t idx;
+ if (path[2] == L'?' && IS_SEP(&path[3]) &&
+ (path[4] == L'U' || path[4] == L'u') &&
+ (path[5] == L'N' || path[5] == L'n') &&
+ (path[6] == L'C' || path[6] == L'c') &&
+ IS_SEP(&path[7]))
+ {
+ idx = 8;
+ }
+ else {
+ idx = 2;
+ }
+ while (!SEP_OR_END(&path[idx])) {
+ idx++;
+ }
+ if (IS_END(&path[idx])) {
+ *drvsize = idx;
+ *rootsize = 0;
+ }
+ else {
+ idx++;
+ while (!SEP_OR_END(&path[idx])) {
+ idx++;
+ }
+ *drvsize = idx;
+ if (IS_END(&path[idx])) {
+ *rootsize = 0;
+ }
+ else {
+ *rootsize = 1;
+ }
+ }
+ }
+ else {
+ // Relative path with root, e.g. \Windows
+ *drvsize = 0;
+ *rootsize = 1;
+ }
+ }
+ else if (!IS_END(&path[0]) && path[1] == L':') {
+ *drvsize = 2;
+ if (IS_SEP(&path[2])) {
+ // Absolute drive-letter path, e.g. X:\Windows
+ *rootsize = 1;
+ }
+ else {
+ // Relative path with drive, e.g. X:Windows
+ *rootsize = 0;
+ }
+ }
+ else {
+ // Relative path, e.g. Windows
+ *drvsize = 0;
+ *rootsize = 0;
+ }
+#undef SEP_OR_END
+#undef IS_SEP
+#undef IS_END
+#endif
+}
+
// The caller must ensure "buffer" is big enough.
static int
join_relfile(wchar_t *buffer, size_t bufsize,
#endif
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
- // Skip leading '.\'
if (p1[0] == L'.' && IS_SEP(&p1[1])) {
+ // Skip leading '.\'
path = &path[2];
- while (IS_SEP(path) && !IS_END(path)) {
+ while (IS_SEP(path)) {
path++;
}
p1 = p2 = minP2 = path;
lastC = SEP;
}
+ else {
+ Py_ssize_t drvsize, rootsize;
+ _Py_skiproot(path, size, &drvsize, &rootsize);
+ if (drvsize || rootsize) {
+ // Skip past root and update minP2
+ p1 = &path[drvsize + rootsize];
+#ifndef ALTSEP
+ p2 = p1;
+#else
+ for (; p2 < p1; ++p2) {
+ if (*p2 == ALTSEP) {
+ *p2 = SEP;
+ }
+ }
+#endif
+ minP2 = p2 - 1;
+ lastC = *minP2;
#ifdef MS_WINDOWS
- // Skip past drive segment and update minP2
- else if (p1[0] && p1[1] == L':') {
- *p2++ = *p1++;
- *p2++ = *p1++;
- minP2 = p2;
- lastC = L':';
- }
- // Skip past all \\-prefixed paths, including \\?\, \\.\,
- // and network paths, including the first segment.
- else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) {
- int sepCount = 2;
- *p2++ = SEP;
- *p2++ = SEP;
- p1 += 2;
- for (; !IS_END(p1) && sepCount; ++p1) {
- if (IS_SEP(p1)) {
- --sepCount;
- *p2++ = lastC = SEP;
- } else {
- *p2++ = lastC = *p1;
+ if (lastC != SEP) {
+ minP2++;
}
+#endif
}
- minP2 = p2 - 1;
- }
-#else
- // Skip past two leading SEPs
- else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) {
- *p2++ = *p1++;
- *p2++ = *p1++;
- minP2 = p2 - 1; // Absolute path has SEP at minP2
- lastC = SEP;
}
-#endif /* MS_WINDOWS */
/* if pEnd is specified, check that. Else, check for null terminator */
for (; !IS_END(p1); ++p1) {