]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-139877: Use PyBytesWriter in pycore_blocks_output_buffer.h (#139976)
authorEmma Smith <emma@emmatyping.dev>
Tue, 14 Oct 2025 17:03:55 +0000 (10:03 -0700)
committerGitHub <noreply@github.com>
Tue, 14 Oct 2025 17:03:55 +0000 (10:03 -0700)
Previously, the _BlocksOutputBuffer code creates a list of bytes objects to handle the output data from compression libraries. This ends up being slow due to the output buffer code needing to copy each bytes element of the list into the final bytes object buffer at the end of compression.

The new PyBytesWriter API introduced in PEP 782 is an ergonomic and fast method of writing data into a buffer that will later turn into a bytes object. Benchmarks show that using the PyBytesWriter API is 10-30% faster for decompression across a variety of settings. The performance gains are greatest when the decompressor is very performant, such as for Zstandard (and likely zlib-ng). Otherwise the decompressor can bottleneck decompression and the gains are more modest, but still sizable (e.g. 10% faster for zlib)!

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Include/internal/pycore_blocks_output_buffer.h
Modules/_bz2module.c
Modules/_lzmamodule.c
Modules/_zstd/buffer.h
Modules/_zstd/compressor.c
Modules/_zstd/decompressor.c
Modules/zlibmodule.c

index 573e10359b7bd271c2da4c9674a643b8e738ae41..016e7a18665859f69c68e5a16241a2ed3cc99b63 100644 (file)
@@ -45,12 +45,14 @@ extern "C" {
 #endif
 
 typedef struct {
-    // List of bytes objects
-    PyObject *list;
+    // Bytes writer managing output buffer
+    PyBytesWriter *writer;
     // Number of whole allocated size
     Py_ssize_t allocated;
-    // Max length of the buffer, negative number means unlimited length.
+    // Max length of the buffer, negative number means unlimited length
     Py_ssize_t max_length;
+    // Number of blocks of bytes. Used to calculate next allocation size
+    size_t num_blocks;
 } _BlocksOutputBuffer;
 
 static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
@@ -107,11 +109,10 @@ _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
                                 const Py_ssize_t max_length,
                                 void **next_out)
 {
-    PyObject *b;
     Py_ssize_t block_size;
 
-    // ensure .list was set to NULL
-    assert(buffer->list == NULL);
+    // ensure .writer was set to NULL
+    assert(buffer->writer == NULL);
 
     // get block size
     if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) {
@@ -120,25 +121,17 @@ _BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer,
         block_size = BUFFER_BLOCK_SIZE[0];
     }
 
-    // the first block
-    b = PyBytes_FromStringAndSize(NULL, block_size);
-    if (b == NULL) {
+    buffer->writer = PyBytesWriter_Create(block_size);
+    if (buffer->writer == NULL) {
         return -1;
     }
 
-    // create the list
-    buffer->list = PyList_New(1);
-    if (buffer->list == NULL) {
-        Py_DECREF(b);
-        return -1;
-    }
-    PyList_SET_ITEM(buffer->list, 0, b);
-
     // set variables
     buffer->allocated = block_size;
     buffer->max_length = max_length;
+    buffer->num_blocks = 1;
 
-    *next_out = PyBytes_AS_STRING(b);
+    *next_out = PyBytesWriter_GetData(buffer->writer);
     return block_size;
 }
 
@@ -155,31 +148,21 @@ _BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer,
                                  const Py_ssize_t init_size,
                                  void **next_out)
 {
-    PyObject *b;
 
-    // ensure .list was set to NULL
-    assert(buffer->list == NULL);
+    // ensure .writer was set to NULL
+    assert(buffer->writer == NULL);
 
-    // the first block
-    b = PyBytes_FromStringAndSize(NULL, init_size);
-    if (b == NULL) {
-        PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
+    buffer->writer = PyBytesWriter_Create(init_size);
+    if (buffer->writer == NULL) {
         return -1;
     }
 
-    // create the list
-    buffer->list = PyList_New(1);
-    if (buffer->list == NULL) {
-        Py_DECREF(b);
-        return -1;
-    }
-    PyList_SET_ITEM(buffer->list, 0, b);
-
     // set variables
     buffer->allocated = init_size;
     buffer->max_length = -1;
+    buffer->num_blocks = 1;
 
-    *next_out = PyBytes_AS_STRING(b);
+    *next_out = PyBytesWriter_GetData(buffer->writer);
     return init_size;
 }
 
@@ -193,8 +176,6 @@ _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
                          void **next_out,
                          const Py_ssize_t avail_out)
 {
-    PyObject *b;
-    const Py_ssize_t list_len = Py_SIZE(buffer->list);
     Py_ssize_t block_size;
 
     // ensure no gaps in the data
@@ -205,11 +186,10 @@ _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
     }
 
     // get block size
-    if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) {
-        block_size = BUFFER_BLOCK_SIZE[list_len];
-    } else {
-        block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
-    }
+    size_t maxblock = Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE);
+    assert(maxblock >= 1);
+    size_t block_index = Py_MIN(buffer->num_blocks, maxblock - 1);
+    block_size = BUFFER_BLOCK_SIZE[block_index];
 
     // check max_length
     if (buffer->max_length >= 0) {
@@ -229,22 +209,19 @@ _BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer,
         return -1;
     }
 
-    // create the block
-    b = PyBytes_FromStringAndSize(NULL, block_size);
-    if (b == NULL) {
+    if (PyBytesWriter_Grow(buffer->writer, block_size)) {
         PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
         return -1;
     }
-    if (PyList_Append(buffer->list, b) < 0) {
-        Py_DECREF(b);
-        return -1;
-    }
-    Py_DECREF(b);
+
+    Py_ssize_t current_size = buffer->allocated;
 
     // set variables
     buffer->allocated += block_size;
+    buffer->num_blocks += 1;
 
-    *next_out = PyBytes_AS_STRING(b);
+    char *data = PyBytesWriter_GetData(buffer->writer);
+    *next_out = data + current_size;
     return block_size;
 }
 
@@ -265,54 +242,17 @@ static inline PyObject *
 _BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer,
                            const Py_ssize_t avail_out)
 {
-    PyObject *result, *block;
-    const Py_ssize_t list_len = Py_SIZE(buffer->list);
-
-    // fast path for single block
-    if ((list_len == 1 && avail_out == 0) ||
-        (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out))
-    {
-        block = PyList_GET_ITEM(buffer->list, 0);
-        Py_INCREF(block);
-
-        Py_CLEAR(buffer->list);
-        return block;
-    }
-
-    // final bytes object
-    result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out);
-    if (result == NULL) {
-        PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
-        return NULL;
-    }
-
-    // memory copy
-    if (list_len > 0) {
-        char *posi = PyBytes_AS_STRING(result);
-
-        // blocks except the last one
-        Py_ssize_t i = 0;
-        for (; i < list_len-1; i++) {
-            block = PyList_GET_ITEM(buffer->list, i);
-            memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block));
-            posi += Py_SIZE(block);
-        }
-        // the last block
-        block = PyList_GET_ITEM(buffer->list, i);
-        memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out);
-    } else {
-        assert(Py_SIZE(result) == 0);
-    }
-
-    Py_CLEAR(buffer->list);
-    return result;
+    assert(buffer->writer != NULL);
+    return PyBytesWriter_FinishWithSize(buffer->writer,
+                                        buffer->allocated - avail_out);
 }
 
 /* Clean up the buffer when an error occurred. */
 static inline void
 _BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer)
 {
-    Py_CLEAR(buffer->list);
+    PyBytesWriter_Discard(buffer->writer);
+    buffer->writer = NULL;
 }
 
 #ifdef __cplusplus
index 2e4cc43a2c3f11dbae0c7c8c3751db061f57c9a0..9721b493a19956bd6a64f083fbd48559485c0beb 100644 (file)
@@ -190,7 +190,7 @@ static PyObject *
 compress(BZ2Compressor *c, char *data, size_t len, int action)
 {
     PyObject *result;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
 
     if (OutputBuffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
         goto error;
@@ -429,7 +429,7 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
        compare against max_length and PyBytes_GET_SIZE we declare it as
        signed */
     PyObject *result;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     bz_stream *bzs = &d->bzs;
 
     if (OutputBuffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
index 3e8e37096ba6b4ed38850df832f2d4465563f9fc..6fc072f6d0a38236ef4587ae63be3ae679562ae3 100644 (file)
@@ -554,7 +554,7 @@ static PyObject *
 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
 {
     PyObject *result;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
     assert(state != NULL);
 
@@ -940,7 +940,7 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
 {
     PyObject *result;
     lzma_stream *lzs = &d->lzs;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
     assert(state != NULL);
 
index 0ac7bcb4ddc41657f438e7e54df625d5d15ecc46..807c72c80dde8be0c1c996ffb9d8b6879b36e206 100644 (file)
@@ -16,8 +16,8 @@ static inline int
 _OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
                         Py_ssize_t max_length)
 {
-    /* Ensure .list was set to NULL */
-    assert(buffer->list == NULL);
+    /* Ensure .writer was set to NULL */
+    assert(buffer->writer == NULL);
 
     Py_ssize_t res = _BlocksOutputBuffer_InitAndGrow(buffer, max_length,
                                                      &ob->dst);
@@ -39,8 +39,8 @@ _OutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, ZSTD_outBuffer *ob,
 {
     Py_ssize_t block_size;
 
-    /* Ensure .list was set to NULL */
-    assert(buffer->list == NULL);
+    /* Ensure .writer was set to NULL */
+    assert(buffer->writer == NULL);
 
     /* Get block size */
     if (0 <= max_length && max_length < init_size) {
index 029c07113d4f45603f0b7a63a4beacf8e2739ebe..f90bc9c5ab58b121cbd47904f157b37f7a6fe793 100644 (file)
@@ -446,7 +446,7 @@ compress_lock_held(ZstdCompressor *self, Py_buffer *data,
     assert(PyMutex_IsLocked(&self->lock));
     ZSTD_inBuffer in;
     ZSTD_outBuffer out;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     size_t zstd_ret;
     PyObject *ret;
 
@@ -527,7 +527,7 @@ compress_mt_continue_lock_held(ZstdCompressor *self, Py_buffer *data)
     assert(PyMutex_IsLocked(&self->lock));
     ZSTD_inBuffer in;
     ZSTD_outBuffer out;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     size_t zstd_ret;
     PyObject *ret;
 
index 6592cad6690d49e1c403f5a04f4997cc95d8c887..13071b7a2bacf046607ceef4e90cbe9cd79136f8 100644 (file)
@@ -216,7 +216,7 @@ decompress_lock_held(ZstdDecompressor *self, ZSTD_inBuffer *in,
 {
     size_t zstd_ret;
     ZSTD_outBuffer out;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     PyObject *ret;
 
     /* Initialize the output buffer */
index f1312e687da71c71ffcd18e2d99b06307aa83ce8..36c933bf618af0b0dc26e0883d99c27356fe72b8 100644 (file)
@@ -344,7 +344,7 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits)
     PyObject *return_value;
     int flush;
     z_stream zst;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
 
     zlibstate *state = get_zlib_state(module);
 
@@ -445,7 +445,7 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
     Py_ssize_t ibuflen;
     int err, flush;
     z_stream zst;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     _Uint32Window window;  // output buffer's UINT32_MAX sliding window
 
     zlibstate *state = get_zlib_state(module);
@@ -774,7 +774,7 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls,
 {
     PyObject *return_value;
     int err;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     zlibstate *state = PyType_GetModuleState(cls);
 
     ENTER_ZLIB(self);
@@ -898,7 +898,7 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls,
     int err = Z_OK;
     Py_ssize_t ibuflen;
     PyObject *return_value;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
 
     PyObject *module = PyType_GetModule(cls);
     if (module == NULL)
@@ -1005,7 +1005,7 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
 {
     int err;
     PyObject *return_value;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
 
     zlibstate *state = PyType_GetModuleState(cls);
     /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in
@@ -1267,7 +1267,7 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
     Py_buffer data;
     PyObject *return_value;
     Py_ssize_t ibuflen;
-    _BlocksOutputBuffer buffer = {.list = NULL};
+    _BlocksOutputBuffer buffer = {.writer = NULL};
     _Uint32Window window;  // output buffer's UINT32_MAX sliding window
 
     PyObject *module = PyType_GetModule(cls);