]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
gh-144759: Fix undefined behavior from NULL pointer arithmetic in lexer (#144788)
authorRamin Farajpour Cami <ramin.blackhat@gmail.com>
Sun, 15 Feb 2026 14:39:57 +0000 (18:09 +0330)
committerGitHub <noreply@github.com>
Sun, 15 Feb 2026 14:39:57 +0000 (14:39 +0000)
Guard against NULL pointer arithmetic in `_PyLexer_remember_fstring_buffers`
and `_PyLexer_restore_fstring_buffers`. When `start` or `multi_line_start`
are NULL (uninitialized in tok_mode_stack[0]), performing `NULL - tok->buf`
is undefined behavior. Add explicit NULL checks to store -1 as sentinel
and restore NULL accordingly.

Add test_lexer_buffer_realloc_with_null_start to test_repl.py that
exercises the code path where the lexer buffer is reallocated while
tok_mode_stack[0] has NULL start/multi_line_start pointers. This
triggers _PyLexer_remember_fstring_buffers and verifies the NULL
checks prevent undefined behavior.

Lib/test/test_repl.py
Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst [new file with mode: 0644]
Parser/lexer/buffer.c

index 6cdb1ca65c6aedc7fd1a30334c65d1fbbc536fa5..40965835bcec00cb7a0ee20b62d037baeaf294fa 100644 (file)
@@ -143,6 +143,22 @@ class TestInteractiveInterpreter(unittest.TestCase):
         output = kill_python(p)
         self.assertEqual(p.returncode, 0)
 
+    @cpython_only
+    def test_lexer_buffer_realloc_with_null_start(self):
+        # gh-144759: NULL pointer arithmetic in the lexer when start and
+        # multi_line_start are NULL (uninitialized in tok_mode_stack[0])
+        # and the lexer buffer is reallocated while parsing long input.
+        long_value = "a" * 2000
+        user_input = dedent(f"""\
+        x = f'{{{long_value!r}}}'
+        print(x)
+        """)
+        p = spawn_repl()
+        p.stdin.write(user_input)
+        output = kill_python(p)
+        self.assertEqual(p.returncode, 0)
+        self.assertIn(long_value, output)
+
     def test_close_stdin(self):
         user_input = dedent('''
             import os
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-13-12-00-00.gh-issue-144759.d3qYpe.rst
new file mode 100644 (file)
index 0000000..46786d0
--- /dev/null
@@ -0,0 +1,4 @@
+Fix undefined behavior in the lexer when ``start`` and ``multi_line_start``
+pointers are ``NULL`` in ``_PyLexer_remember_fstring_buffers()`` and
+``_PyLexer_restore_fstring_buffers()``. The ``NULL`` pointer arithmetic
+(``NULL - valid_pointer``) is now guarded with explicit ``NULL`` checks.
index 63aa1ea2ad4f60492429608ea82888a2b5e58bd8..e122fd0d9878ea2c327085521ccdea7b2afc3cb1 100644 (file)
@@ -13,8 +13,8 @@ _PyLexer_remember_fstring_buffers(struct tok_state *tok)
 
     for (index = tok->tok_mode_stack_index; index >= 0; --index) {
         mode = &(tok->tok_mode_stack[index]);
-        mode->start_offset = mode->start - tok->buf;
-        mode->multi_line_start_offset = mode->multi_line_start - tok->buf;
+        mode->start_offset = mode->start == NULL ? -1 : mode->start - tok->buf;
+        mode->multi_line_start_offset = mode->multi_line_start == NULL ? -1 : mode->multi_line_start - tok->buf;
     }
 }
 
@@ -27,8 +27,8 @@ _PyLexer_restore_fstring_buffers(struct tok_state *tok)
 
     for (index = tok->tok_mode_stack_index; index >= 0; --index) {
         mode = &(tok->tok_mode_stack[index]);
-        mode->start = tok->buf + mode->start_offset;
-        mode->multi_line_start = tok->buf + mode->multi_line_start_offset;
+        mode->start = mode->start_offset < 0 ? NULL : tok->buf + mode->start_offset;
+        mode->multi_line_start = mode->multi_line_start_offset < 0 ? NULL : tok->buf + mode->multi_line_start_offset;
     }
 }