output = kill_python(p)
self.assertEqual(p.returncode, 0)
+ @cpython_only
+ def test_lexer_buffer_realloc_with_null_start(self):
+ # gh-144759: NULL pointer arithmetic in the lexer when start and
+ # multi_line_start are NULL (uninitialized in tok_mode_stack[0])
+ # and the lexer buffer is reallocated while parsing long input.
+ long_value = "a" * 2000
+ user_input = dedent(f"""\
+ x = f'{{{long_value!r}}}'
+ print(x)
+ """)
+ p = spawn_repl()
+ p.stdin.write(user_input)
+ output = kill_python(p)
+ self.assertEqual(p.returncode, 0)
+ self.assertIn(long_value, output)
+
def test_close_stdin(self):
user_input = dedent('''
import os
--- /dev/null
+Fix undefined behavior in the lexer when ``start`` and ``multi_line_start``
+pointers are ``NULL`` in ``_PyLexer_remember_fstring_buffers()`` and
+``_PyLexer_restore_fstring_buffers()``. The ``NULL`` pointer arithmetic
+(``NULL - valid_pointer``) is now guarded with explicit ``NULL`` checks.
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
mode = &(tok->tok_mode_stack[index]);
- mode->start_offset = mode->start - tok->buf;
- mode->multi_line_start_offset = mode->multi_line_start - tok->buf;
+ mode->start_offset = mode->start == NULL ? -1 : mode->start - tok->buf;
+ mode->multi_line_start_offset = mode->multi_line_start == NULL ? -1 : mode->multi_line_start - tok->buf;
}
}
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
mode = &(tok->tok_mode_stack[index]);
- mode->start = tok->buf + mode->start_offset;
- mode->multi_line_start = tok->buf + mode->multi_line_start_offset;
+ mode->start = mode->start_offset < 0 ? NULL : tok->buf + mode->start_offset;
+ mode->multi_line_start = mode->multi_line_start_offset < 0 ? NULL : tok->buf + mode->multi_line_start_offset;
}
}