]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.11] gh-119506: fix _io.TextIOWrapper.write() write during flush (#119507) (#120314)
authorInada Naoki <songofacandy@gmail.com>
Fri, 9 Aug 2024 17:04:36 +0000 (02:04 +0900)
committerGitHub <noreply@github.com>
Fri, 9 Aug 2024 17:04:36 +0000 (17:04 +0000)
Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
fix _io.TextIOWrapper.write() write during flush (#119507)

Lib/test/test_io.py
Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst [new file with mode: 0644]
Modules/_io/textio.c

index 537c9fa7b9835b3d1e0929d674e6ec5f90d82b7e..1e444d2566e8554148121af91bad3e87cd628206 100644 (file)
@@ -3960,6 +3960,28 @@ class CTextIOWrapperTest(TextIOWrapperTest):
         t.write("x"*chunk_size)
         self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack)
 
+    def test_issue119506(self):
+        chunk_size = 8192
+
+        class MockIO(self.MockRawIO):
+            written = False
+            def write(self, data):
+                if not self.written:
+                    self.written = True
+                    t.write("middle")
+                return super().write(data)
+
+        buf = MockIO()
+        t = self.TextIOWrapper(buf)
+        t.write("abc")
+        t.write("def")
+        # writing data which size >= chunk_size cause flushing buffer before write.
+        t.write("g" * chunk_size)
+        t.flush()
+
+        self.assertEqual([b"abcdef", b"middle", b"g"*chunk_size],
+                         buf._write_stack)
+
 
 class PyTextIOWrapperTest(TextIOWrapperTest):
     io = pyio
diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst
new file mode 100644 (file)
index 0000000..f9b764a
--- /dev/null
@@ -0,0 +1 @@
+Fix :meth:`!io.TextIOWrapper.write` method breaks internal buffer when the method is called again during flushing internal buffer.
index 3de4c06704b8b9da8407d744f212260c53e99694..ba69e2afd275704a1c37ad5dbd8fff7d77dc660c 100644 (file)
@@ -1701,34 +1701,56 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
         bytes_len = PyBytes_GET_SIZE(b);
     }
 
-    if (self->pending_bytes == NULL) {
-        self->pending_bytes_count = 0;
-        self->pending_bytes = b;
-    }
-    else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
-        // Prevent to concatenate more than chunk_size data.
-        if (_textiowrapper_writeflush(self) < 0) {
-            Py_DECREF(b);
-            return NULL;
+    // We should avoid concatinating huge data.
+    // Flush the buffer before adding b to the buffer if b is not small.
+    // https://github.com/python/cpython/issues/87426
+    if (bytes_len >= self->chunk_size) {
+        // _textiowrapper_writeflush() calls buffer.write().
+        // self->pending_bytes can be appended during buffer->write()
+        // or other thread.
+        // We need to loop until buffer becomes empty.
+        // https://github.com/python/cpython/issues/118138
+        // https://github.com/python/cpython/issues/119506
+        while (self->pending_bytes != NULL) {
+            if (_textiowrapper_writeflush(self) < 0) {
+                Py_DECREF(b);
+                return NULL;
+            }
         }
-        self->pending_bytes = b;
     }
-    else if (!PyList_CheckExact(self->pending_bytes)) {
-        PyObject *list = PyList_New(2);
-        if (list == NULL) {
-            Py_DECREF(b);
-            return NULL;
-        }
-        PyList_SET_ITEM(list, 0, self->pending_bytes);
-        PyList_SET_ITEM(list, 1, b);
-        self->pending_bytes = list;
+
+    if (self->pending_bytes == NULL) {
+        assert(self->pending_bytes_count == 0);
+        self->pending_bytes = b;
     }
     else {
-        if (PyList_Append(self->pending_bytes, b) < 0) {
-            Py_DECREF(b);
-            return NULL;
+        if (!PyList_CheckExact(self->pending_bytes)) {
+            PyObject *list = PyList_New(0);
+            if (list == NULL) {
+                Py_DECREF(b);
+                return NULL;
+            }
+            // PyList_New() may trigger GC and other thread may call write().
+            // So, we need to check the self->pending_bytes is a list again.
+            if (PyList_CheckExact(self->pending_bytes)) {
+                // Releasing empty list won't trigger GC and/or __del__.
+                Py_DECREF(list);
+            }
+            else {
+                if (PyList_Append(list, self->pending_bytes) < 0) {
+                    Py_DECREF(list);
+                    Py_DECREF(b);
+                    return NULL;
+                }
+                Py_SETREF(self->pending_bytes, list);
+            }
         }
+
+        int ret = PyList_Append(self->pending_bytes, b);
         Py_DECREF(b);
+        if (ret < 0) {
+            return NULL;
+        }
     }
 
     self->pending_bytes_count += bytes_len;