]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-43260: io: Prevent large data remains in textio buffer. (GH-24592)
authorInada Naoki <songofacandy@gmail.com>
Mon, 22 Feb 2021 01:32:55 +0000 (10:32 +0900)
committerGitHub <noreply@github.com>
Mon, 22 Feb 2021 01:32:55 +0000 (10:32 +0900)
When very large data remains in TextIOWrapper, flush() may fail forever.

So prevent that data larger than chunk_size is remained in TextIOWrapper internal
buffer.

Co-Authored-By: Eryk Sun
(cherry picked from commit 01806d5)

Lib/test/test_io.py
Misc/NEWS.d/next/Library/2021-02-20-12-15-29.bpo-43260.6znAas.rst [new file with mode: 0644]
Modules/_io/textio.c

index d2e387728bd5f53192f3926fc5e0d2efec646bfa..1f0210f857a676a8fd236ee1114a04e8bb2db5bf 100644 (file)
@@ -3747,6 +3747,33 @@ class CTextIOWrapperTest(TextIOWrapperTest):
         with self.assertRaises(AttributeError):
             del t._CHUNK_SIZE
 
+    def test_internal_buffer_size(self):
+        # bpo-43260: TextIOWrapper's internal buffer should not store
+        # data larger than chunk size.
+        chunk_size = 8192  # default chunk size, updated later
+
+        class MockIO(self.MockRawIO):
+            def write(self, data):
+                if len(data) > chunk_size:
+                    raise RuntimeError
+                return super().write(data)
+
+        buf = MockIO()
+        t = self.TextIOWrapper(buf, encoding="ascii")
+        chunk_size = t._CHUNK_SIZE
+        t.write("abc")
+        t.write("def")
+        # default chunk size is 8192 bytes so t don't write data to buf.
+        self.assertEqual([], buf._write_stack)
+
+        with self.assertRaises(RuntimeError):
+            t.write("x"*(chunk_size+1))
+
+        self.assertEqual([b"abcdef"], buf._write_stack)
+        t.write("ghi")
+        t.write("x"*chunk_size)
+        self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack)
+
 
 class PyTextIOWrapperTest(TextIOWrapperTest):
     io = pyio
diff --git a/Misc/NEWS.d/next/Library/2021-02-20-12-15-29.bpo-43260.6znAas.rst b/Misc/NEWS.d/next/Library/2021-02-20-12-15-29.bpo-43260.6znAas.rst
new file mode 100644 (file)
index 0000000..f3c21d1
--- /dev/null
@@ -0,0 +1,2 @@
+Fix TextIOWrapper can not flush internal buffer forever after very large
+text is written.
index f2c72ebd516589a0b838d925a156d11c8c0e9c67..966c532a0e3c3063236241f55d2aed1e57ac84b9 100644 (file)
@@ -1602,6 +1602,8 @@ _textiowrapper_writeflush(textio *self)
         ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
     } while (ret == NULL && _PyIO_trap_eintr());
     Py_DECREF(b);
+    // NOTE: We cleared buffer but we don't know how many bytes are actually written
+    // when an error occurred.
     if (ret == NULL)
         return -1;
     Py_DECREF(ret);
@@ -1659,7 +1661,10 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
 
     /* XXX What if we were just reading? */
     if (self->encodefunc != NULL) {
-        if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
+        if (PyUnicode_IS_ASCII(text) &&
+                // See bpo-43260
+                PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
+                is_asciicompat_encoding(self->encodefunc)) {
             b = text;
             Py_INCREF(b);
         }
@@ -1668,8 +1673,9 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
         }
         self->encoding_start_of_stream = 0;
     }
-    else
+    else {
         b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
+    }
 
     Py_DECREF(text);
     if (b == NULL)
@@ -1694,6 +1700,14 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
         self->pending_bytes_count = 0;
         self->pending_bytes = b;
     }
+    else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
+        // Prevent to concatenate more than chunk_size data.
+        if (_textiowrapper_writeflush(self) < 0) {
+            Py_DECREF(b);
+            return NULL;
+        }
+        self->pending_bytes = b;
+    }
     else if (!PyList_CheckExact(self->pending_bytes)) {
         PyObject *list = PyList_New(2);
         if (list == NULL) {
@@ -1713,7 +1727,7 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
     }
 
     self->pending_bytes_count += bytes_len;
-    if (self->pending_bytes_count > self->chunk_size || needflush ||
+    if (self->pending_bytes_count >= self->chunk_size || needflush ||
         text_needflush) {
         if (_textiowrapper_writeflush(self) < 0)
             return NULL;