]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-90533: Implement BytesIO.peek() (#150917)
authorMarcel Martin <mail@marcelm.net>
Fri, 26 Jun 2026 11:46:39 +0000 (13:46 +0200)
committerGitHub <noreply@github.com>
Fri, 26 Jun 2026 11:46:39 +0000 (13:46 +0200)
Add io.BytesIO.peek() method to read without advancing position.

Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Erlend E. Aasland <erlend.aasland@protonmail.com>
Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: Emma Smith <emma@emmatyping.dev>
Co-authored-by: Stan Ulbrych <stan@python.org>
Co-authored-by: Cody Maloney <cmaloney@users.noreply.github.com>
Doc/library/io.rst
Doc/whatsnew/3.16.rst
Lib/_pyio.py
Lib/test/test_free_threading/test_io.py
Lib/test/test_io/test_memoryio.py
Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst [new file with mode: 0644]
Modules/_io/bytesio.c
Modules/_io/clinic/bytesio.c.h

index d47b74efe22de9dce1c9cfb7086563d3d63c032a..c0d7ee877536ad5c47c6be030c568bc724fcba83 100644 (file)
@@ -771,6 +771,17 @@ than raw I/O does.
 
       Return :class:`bytes` containing the entire contents of the buffer.
 
+   .. method:: peek(size=0, /)
+
+      Return a copy of the buffer from the current position onwards without
+      advancing the position.
+
+      If *size* is less than one or omitted, at most
+      :data:`DEFAULT_BUFFER_SIZE` bytes are returned.
+      Otherwise, at most *size* bytes are returned.
+      Return an empty :class:`bytes` object at EOF.
+
+      .. versionadded:: next
 
    .. method:: read1(size=-1, /)
 
index 80f13e4d759dd3088548b97d326790bf9aa98b14..0f01c534d9896f02a872d84bee2f8010c3f52cab 100644 (file)
@@ -163,6 +163,12 @@ gzip
   which is passed on to the constructor of the :class:`~gzip.GzipFile` class.
   (Contributed by Marin Misur in :gh:`91372`.)
 
+io
+--
+
+* Add :meth:`io.BytesIO.peek` method to read without advancing position.
+  (Contributed by Marcel Martin in :gh:`90533`.)
+
 
 logging
 -------
index 4ba9b4070dff93ebaf9d0d92c58d606048442a65..1118b54633b7cc10f85b5b56e54505289611bace 100644 (file)
@@ -1000,6 +1000,13 @@ class BytesIO(BufferedIOBase):
             raise ValueError("tell on closed file")
         return self._pos
 
+    def peek(self, size=0):
+        if self.closed:
+            raise ValueError("peek on closed file")
+        if size < 1:
+            return self._buffer[self._pos:self._pos + io.DEFAULT_BUFFER_SIZE]
+        return self._buffer[self._pos:self._pos + size]
+
     def truncate(self, pos=None):
         if self.closed:
             raise ValueError("truncate on closed file")
index 8a0ad30c4bc770b790edebfb9b01b76d4597e15e..742da6f8c788b423b8456fb04a4456f3ac1163f8 100644 (file)
@@ -67,6 +67,10 @@ class ThreadSafetyMixin:
             barrier.wait()
             b.readinto(into)
 
+        def peek(barrier, b, *ignore):
+            barrier.wait()
+            b.peek()
+
         def close(barrier, b, *ignore):
             barrier.wait()
             b.close()
@@ -103,6 +107,7 @@ class ThreadSafetyMixin:
         self.check([truncate] + [readline] * 10, self.ioclass(b'0\n'*20480))
         self.check([truncate] + [readlines] * 10, self.ioclass(b'0\n'*20480))
         self.check([truncate] + [readinto] * 10, self.ioclass(b'0\n'*204800), bytearray(b'0\n'*204800))
+        self.check([truncate] + [peek] * 10, self.ioclass(b'0\n'*204800))
         self.check([close] + [write] * 10, self.ioclass())
         self.check([truncate] + [getvalue] * 10, self.ioclass(b'0\n'*204800))
         self.check([truncate] + [getbuffer] * 10, self.ioclass(b'0\n'*204800))
index 3669ac0b038b71b5bbe541320857c5732fe55ec6..5a93d2634580461fca570366c572b1502f7e7cb1 100644 (file)
@@ -566,6 +566,77 @@ class PyBytesIOTest(MemoryTestMixin, MemorySeekTestMixin, unittest.TestCase):
         buf = bytearray(2)
         self.assertEqual(0, memio.readinto(buf))
 
+    def test_peek(self):
+        buf = self.buftype("1234567890")
+        with self.ioclass(buf) as memio:
+            self.assertEqual(memio.tell(), 0)
+            self.assertEqual(memio.peek(1), buf[:1])
+            self.assertEqual(memio.peek(1), buf[:1])
+            self.assertEqual(memio.peek(), buf)
+            self.assertEqual(memio.peek(3), buf[:3])
+            self.assertEqual(memio.peek(5), buf[:5])
+            self.assertEqual(memio.peek(0), buf)
+            self.assertEqual(memio.peek(len(buf) + 100), buf)
+            self.assertEqual(memio.peek(-1), buf)
+            self.assertEqual(memio.tell(), 0)
+
+            memio.read(1)
+            self.assertEqual(memio.tell(), 1)
+            self.assertEqual(memio.peek(1), buf[1:2])
+            self.assertEqual(memio.peek(), buf[1:])
+            self.assertEqual(memio.peek(3), buf[1:4])
+            self.assertEqual(memio.peek(5), buf[1:6])
+            self.assertEqual(memio.peek(0), buf[1:])
+            self.assertEqual(memio.peek(len(buf) + 100), buf[1:])
+            self.assertEqual(memio.peek(-1), buf[1:])
+            self.assertEqual(memio.tell(), 1)
+
+            memio.read()
+            self.assertEqual(memio.tell(), len(buf))
+            self.assertEqual(memio.peek(1), self.EOF)
+            self.assertEqual(memio.peek(3), self.EOF)
+            self.assertEqual(memio.peek(5), self.EOF)
+            self.assertEqual(memio.peek(0), b"")
+            self.assertEqual(memio.tell(), len(buf))
+
+            # Peeking works after writing
+            abc = self.buftype("abc")
+            memio.write(abc)
+            self.assertEqual(memio.peek(), self.EOF)
+            memio.seek(len(buf))
+            self.assertEqual(memio.peek(), abc)
+            self.assertEqual(memio.peek(-1), abc)
+            self.assertEqual(memio.peek(len(abc) + 100), abc)
+            self.assertEqual(memio.tell(), len(buf))
+
+        with self.ioclass(buf) as memio:
+            memio.seek(len(buf))
+            self.assertEqual(memio.peek(), self.EOF)
+
+        # Length greater than DEFAULT_BUFFER_SIZE
+        buf = self.buftype("1234567890" * io.DEFAULT_BUFFER_SIZE)
+        with self.ioclass(buf) as memio:
+            self.assertEqual(memio.peek(), buf[:io.DEFAULT_BUFFER_SIZE])
+            self.assertEqual(memio.peek(0), buf[:io.DEFAULT_BUFFER_SIZE])
+            self.assertEqual(memio.peek(-1), buf[:io.DEFAULT_BUFFER_SIZE])
+            self.assertEqual(memio.peek(io.DEFAULT_BUFFER_SIZE + 100),
+                             buf[:io.DEFAULT_BUFFER_SIZE + 100])
+            self.assertEqual(memio.peek(io.DEFAULT_BUFFER_SIZE * 100), buf)
+
+        # Current position beyond buffer end
+        with self.ioclass(buf) as memio:
+            memio.seek(len(buf) + 100)
+            self.assertEqual(memio.peek(), self.EOF)
+        with self.ioclass(buf) as memio:
+            memio.read()
+            memio.truncate(0)
+            self.assertEqual(memio.tell(), len(buf))
+            self.assertEqual(memio.peek(), self.EOF)
+
+
+        # Peek after close raises
+        self.assertRaises(ValueError, memio.peek)
+
     def test_unicode(self):
         memio = self.ioclass()
 
diff --git a/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst b/Misc/NEWS.d/next/Library/2022-04-10-20-10-59.bpo-46375.8j1ogZ.rst
new file mode 100644 (file)
index 0000000..0d1ebb6
--- /dev/null
@@ -0,0 +1 @@
+Add :meth:`io.BytesIO.peek` method to read without advancing position.
index 8cdcbd0d89c718e38d8ce793fe533cc31848cdde..ffe82150be1b5149968419046516faffecb057cf 100644 (file)
@@ -420,8 +420,9 @@ _io_BytesIO_tell_impl(bytesio *self)
     return PyLong_FromSsize_t(self->pos);
 }
 
+/* Read without advancing position. */
 static PyObject *
-read_bytes_lock_held(bytesio *self, Py_ssize_t size)
+peek_bytes_lock_held(bytesio *self, Py_ssize_t size)
 {
     _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(self);
 
@@ -432,7 +433,6 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size)
     if (size > 1 &&
         self->pos == 0 && size == PyBytes_GET_SIZE(self->buf) &&
         FT_ATOMIC_LOAD_SSIZE_RELAXED(self->exports) == 0) {
-        self->pos += size;
         return Py_NewRef(self->buf);
     }
 
@@ -444,10 +444,20 @@ read_bytes_lock_held(bytesio *self, Py_ssize_t size)
     }
 
     output = PyBytes_AS_STRING(self->buf) + self->pos;
-    self->pos += size;
     return PyBytes_FromStringAndSize(output, size);
 }
 
+static PyObject *
+read_bytes_lock_held(bytesio *self, Py_ssize_t size)
+{
+    PyObject *bytes = peek_bytes_lock_held(self, size);
+    if (bytes != NULL) {
+        assert(PyBytes_GET_SIZE(bytes) == size);
+        self->pos += size;
+    }
+    return bytes;
+}
+
 /*[clinic input]
 @critical_section
 _io.BytesIO.read
@@ -499,6 +509,41 @@ _io_BytesIO_read1_impl(bytesio *self, Py_ssize_t size)
     return _io_BytesIO_read_impl(self, size);
 }
 
+
+/*[clinic input]
+@critical_section
+_io.BytesIO.peek
+    size: Py_ssize_t = 0
+    /
+
+Return bytes from the stream without advancing the position.
+
+Return an empty bytes object at EOF.
+[clinic start generated code]*/
+
+static PyObject *
+_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size)
+/*[clinic end generated code: output=fa4d8ce28b35db9b input=2ce74234b10aec3e]*/
+{
+    CHECK_CLOSED(self);
+
+    if (size < 1) {
+        size = DEFAULT_BUFFER_SIZE;
+    }
+
+    /* adjust invalid sizes */
+    Py_ssize_t n = self->string_size - self->pos;
+    if (size > n) {
+        size = n;
+        /* n can be negative after truncate() or seek() */
+        if (size < 0) {
+            size = 0;
+        }
+    }
+    return peek_bytes_lock_held(self, size);
+}
+
+
 /*[clinic input]
 @critical_section
 _io.BytesIO.readline
@@ -1135,6 +1180,7 @@ static struct PyMethodDef bytesio_methods[] = {
     _IO_BYTESIO_READLINE_METHODDEF
     _IO_BYTESIO_READLINES_METHODDEF
     _IO_BYTESIO_READ_METHODDEF
+    _IO_BYTESIO_PEEK_METHODDEF
     _IO_BYTESIO_GETBUFFER_METHODDEF
     _IO_BYTESIO_GETVALUE_METHODDEF
     _IO_BYTESIO_SEEK_METHODDEF
index fad11ea6c9f6cf6e5c39eedcc42e01f60ff0d260..a0159c063b585913bcf688eaf76d75755098f138 100644 (file)
@@ -287,6 +287,53 @@ exit:
     return return_value;
 }
 
+PyDoc_STRVAR(_io_BytesIO_peek__doc__,
+"peek($self, size=0, /)\n"
+"--\n"
+"\n"
+"Return bytes from the stream without advancing the position.\n"
+"\n"
+"Return an empty bytes object at EOF.");
+
+#define _IO_BYTESIO_PEEK_METHODDEF    \
+    {"peek", _PyCFunction_CAST(_io_BytesIO_peek), METH_FASTCALL, _io_BytesIO_peek__doc__},
+
+static PyObject *
+_io_BytesIO_peek_impl(bytesio *self, Py_ssize_t size);
+
+static PyObject *
+_io_BytesIO_peek(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t size = 0;
+
+    if (!_PyArg_CheckPositional("peek", nargs, 0, 1)) {
+        goto exit;
+    }
+    if (nargs < 1) {
+        goto skip_optional;
+    }
+    {
+        Py_ssize_t ival = -1;
+        PyObject *iobj = _PyNumber_Index(args[0]);
+        if (iobj != NULL) {
+            ival = PyLong_AsSsize_t(iobj);
+            Py_DECREF(iobj);
+        }
+        if (ival == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        size = ival;
+    }
+skip_optional:
+    Py_BEGIN_CRITICAL_SECTION(self);
+    return_value = _io_BytesIO_peek_impl((bytesio *)self, size);
+    Py_END_CRITICAL_SECTION();
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_io_BytesIO_readline__doc__,
 "readline($self, size=-1, /)\n"
 "--\n"
@@ -637,4 +684,4 @@ skip_optional_pos:
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=eac3911e207aaf45 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=b5e625e31b2a82f0 input=a9049054013a1b77]*/