From: Maurycy Pawłowski-Wieroński <5383+maurycy@users.noreply.github.com> Date: Thu, 16 Oct 2025 17:24:34 +0000 (+0200) Subject: gh-140149: Use PyBytesWriter in _build_concatenated_bytes() (#140150) X-Git-Tag: v3.15.0a2~414 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=459d493ce3288cda7dcebb868970b199764502f5;p=thirdparty%2FPython%2Fcpython.git gh-140149: Use PyBytesWriter in _build_concatenated_bytes() (#140150) Use PyBytesWriter in action_helpers.c _build_concatenated_bytes(). 3x faster bytes concat in the parser. Co-authored-by: Victor Stinner --- diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst new file mode 100644 index 000000000000..e98e28802cfe --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-10-15-17-12-32.gh-issue-140149.cy1m3d.rst @@ -0,0 +1,2 @@ +Speed up parsing bytes literals concatenation by using PyBytesWriter API and +a single memory allocation (about 3x faster). diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 57e46b4399c6..b7a5b9d5e307 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1612,19 +1612,46 @@ _build_concatenated_bytes(Parser *p, asdl_expr_seq *strings, int lineno, Py_ssize_t len = asdl_seq_LEN(strings); assert(len > 0); - PyObject* res = Py_GetConstant(Py_CONSTANT_EMPTY_BYTES); - /* Bytes literals never get a kind, but just for consistency since they are represented as Constant nodes, we'll mirror the same behavior as unicode strings for determining the kind. */ - PyObject* kind = asdl_seq_GET(strings, 0)->v.Constant.kind; + PyObject *kind = asdl_seq_GET(strings, 0)->v.Constant.kind; + + Py_ssize_t total = 0; + for (Py_ssize_t i = 0; i < len; i++) { + expr_ty elem = asdl_seq_GET(strings, i); + PyObject *bytes = elem->v.Constant.value; + Py_ssize_t part = PyBytes_GET_SIZE(bytes); + if (part > PY_SSIZE_T_MAX - total) { + PyErr_NoMemory(); + return NULL; + } + total += part; + } + + PyBytesWriter *writer = PyBytesWriter_Create(total); + if (writer == NULL) { + return NULL; + } + char *out = PyBytesWriter_GetData(writer); + for (Py_ssize_t i = 0; i < len; i++) { expr_ty elem = asdl_seq_GET(strings, i); - PyBytes_Concat(&res, elem->v.Constant.value); + PyObject *bytes = elem->v.Constant.value; + Py_ssize_t part = PyBytes_GET_SIZE(bytes); + if (part > 0) { + memcpy(out, PyBytes_AS_STRING(bytes), part); + out += part; + } } - if (!res || _PyArena_AddPyObject(arena, res) < 0) { - Py_XDECREF(res); + + PyObject *res = PyBytesWriter_Finish(writer); + if (res == NULL) { + return NULL; + } + if (_PyArena_AddPyObject(arena, res) < 0) { + Py_DECREF(res); return NULL; } return _PyAST_Constant(res, kind, lineno, col_offset, end_lineno, end_col_offset, p->arena);