]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.11] gh-95782: Fix io.BufferedReader.tell() etc. being able to return offsets ...
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>
Sat, 17 Feb 2024 12:55:43 +0000 (13:55 +0100)
committerGitHub <noreply@github.com>
Sat, 17 Feb 2024 12:55:43 +0000 (14:55 +0200)
lseek() always returns 0 for character pseudo-devices like
`/dev/urandom` (for other non-regular files, e.g. `/dev/stdin`, it
always returns -1, to which CPython reacts by raising appropriate
exceptions). They are thus technically seekable despite not having seek
semantics.

When calling read() on e.g. an instance of `io.BufferedReader` that
wraps such a file, `BufferedReader` reads ahead, filling its buffer,
creating a discrepancy between the number of bytes read and the internal
`tell()` always returning 0, which previously resulted in e.g.
`BufferedReader.tell()` or `BufferedReader.seek()` being able to return
positions < 0 even though these are supposed to be always >= 0.

Invariably keep the return value non-negative by returning
max(former_return_value, 0) instead, and add some corresponding tests.
(cherry picked from commit 26800cf25a0970d46934fa9a881c0ef6881d642b)

Co-authored-by: 6t8k <58048945+6t8k@users.noreply.github.com>
Lib/_pyio.py
Lib/test/test_io.py
Misc/NEWS.d/next/Library/2022-11-22-23-17-43.gh-issue-95782.an_and.rst [new file with mode: 0644]
Modules/_io/bufferedio.c

index 16d025b1703e4ffd44a97a9ab531d4db84be0661..f17ae1024c1a4cb4af892acc10baee28f3796b97 100644 (file)
@@ -1224,7 +1224,8 @@ class BufferedReader(_BufferedIOMixin):
         return written
 
     def tell(self):
-        return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
+        # GH-95782: Keep return value non-negative
+        return max(_BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos, 0)
 
     def seek(self, pos, whence=0):
         if whence not in valid_seek_flags:
index e77da124a30779b5e72db8333ab7f974ba7ef5c4..537c9fa7b9835b3d1e0929d674e6ec5f90d82b7e 100644 (file)
@@ -263,6 +263,27 @@ class PyMockUnseekableIO(MockUnseekableIO, pyio.BytesIO):
     UnsupportedOperation = pyio.UnsupportedOperation
 
 
+class MockCharPseudoDevFileIO(MockFileIO):
+    # GH-95782
+    # ftruncate() does not work on these special files (and CPython then raises
+    # appropriate exceptions), so truncate() does not have to be accounted for
+    # here.
+    def __init__(self, data):
+        super().__init__(data)
+
+    def seek(self, *args):
+        return 0
+
+    def tell(self, *args):
+        return 0
+
+class CMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, io.BytesIO):
+    pass
+
+class PyMockCharPseudoDevFileIO(MockCharPseudoDevFileIO, pyio.BytesIO):
+    pass
+
+
 class MockNonBlockWriterIO:
 
     def __init__(self):
@@ -1556,6 +1577,30 @@ class BufferedReaderTest(unittest.TestCase, CommonBufferedTests):
         self.assertRaises(self.UnsupportedOperation, bufio.truncate)
         self.assertRaises(self.UnsupportedOperation, bufio.truncate, 0)
 
+    def test_tell_character_device_file(self):
+        # GH-95782
+        # For the (former) bug in BufferedIO to manifest, the wrapped IO obj
+        # must be able to produce at least 2 bytes.
+        raw = self.MockCharPseudoDevFileIO(b"12")
+        buf = self.tp(raw)
+        self.assertEqual(buf.tell(), 0)
+        self.assertEqual(buf.read(1), b"1")
+        self.assertEqual(buf.tell(), 0)
+
+    def test_seek_character_device_file(self):
+        raw = self.MockCharPseudoDevFileIO(b"12")
+        buf = self.tp(raw)
+        self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
+        self.assertEqual(buf.seek(1, io.SEEK_SET), 0)
+        self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
+        self.assertEqual(buf.read(1), b"1")
+
+        # In the C implementation, tell() sets the BufferedIO's abs_pos to 0,
+        # which means that the next seek() could return a negative offset if it
+        # does not sanity-check:
+        self.assertEqual(buf.tell(), 0)
+        self.assertEqual(buf.seek(0, io.SEEK_CUR), 0)
+
 
 class CBufferedReaderTest(BufferedReaderTest, SizeofTest):
     tp = io.BufferedReader
@@ -4790,7 +4835,7 @@ def load_tests(loader, tests, pattern):
     # classes in the __dict__ of each test.
     mocks = (MockRawIO, MisbehavedRawIO, MockFileIO, CloseFailureIO,
              MockNonBlockWriterIO, MockUnseekableIO, MockRawIOWithoutRead,
-             SlowFlushRawIO)
+             SlowFlushRawIO, MockCharPseudoDevFileIO)
     all_members = io.__all__
     c_io_ns = {name : getattr(io, name) for name in all_members}
     py_io_ns = {name : getattr(pyio, name) for name in all_members}
diff --git a/Misc/NEWS.d/next/Library/2022-11-22-23-17-43.gh-issue-95782.an_and.rst b/Misc/NEWS.d/next/Library/2022-11-22-23-17-43.gh-issue-95782.an_and.rst
new file mode 100644 (file)
index 0000000..123c394
--- /dev/null
@@ -0,0 +1,4 @@
+Fix :func:`io.BufferedReader.tell`, :func:`io.BufferedReader.seek`,
+:func:`_pyio.BufferedReader.tell`, :func:`io.BufferedRandom.tell`,
+:func:`io.BufferedRandom.seek` and :func:`_pyio.BufferedRandom.tell`
+being able to return negative offsets.
index 4013d52219cfc0c4202e0fc8954033e190b9a936..aad081dbc34cd0b2ab977b818f7a4146dca2d92e 100644 (file)
@@ -1196,7 +1196,11 @@ buffered_tell(buffered *self, PyObject *Py_UNUSED(ignored))
     if (pos == -1)
         return NULL;
     pos -= RAW_OFFSET(self);
-    /* TODO: sanity check (pos >= 0) */
+
+    // GH-95782
+    if (pos < 0)
+        pos = 0;
+
     return PyLong_FromOff_t(pos);
 }
 
@@ -1263,6 +1267,11 @@ _io__Buffered_seek_impl(buffered *self, PyObject *targetobj, int whence)
                 offset = target;
             if (offset >= -self->pos && offset <= avail) {
                 self->pos += offset;
+
+                // GH-95782
+                if (current - avail + offset < 0)
+                    return PyLong_FromOff_t(0);
+
                 return PyLong_FromOff_t(current - avail + offset);
             }
         }