From: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Date: Tue, 4 Nov 2025 11:18:23 +0000 (+0100) Subject: [3.14] gh-140815: Fix faulthandler for invalid/freed frame (GH-140921) (#140981) X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=331b4b868c1056370f150a6426bea9e1e5c52aff;p=thirdparty%2FPython%2Fcpython.git [3.14] gh-140815: Fix faulthandler for invalid/freed frame (GH-140921) (#140981) gh-140815: Fix faulthandler for invalid/freed frame (GH-140921) faulthandler now detects if a frame or a code object is invalid or freed. Add helper functions: * _PyCode_SafeAddr2Line() * _PyFrame_SafeGetCode() * _PyFrame_SafeGetLasti() _PyMem_IsPtrFreed() now detects pointers in [-0xff, 0xff] range as freed. (cherry picked from commit a84181c31bfc45a1d6bcb1296bd298ad612c54d0) Co-authored-by: Victor Stinner --- diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 43286774eb2c..dc0ea9c669b7 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -274,8 +274,13 @@ extern void _PyLineTable_InitAddressRange( /** API for traversing the line number table. */ extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); -// This is used in dump_frame() in traceback.c without an attached tstate. -extern int _PyCode_Addr2LineNoTstate(PyCodeObject *co, int addr); + +// Similar to PyCode_Addr2Line(), but return -1 if the code object is invalid +// and can be called without an attached tstate. Used by dump_frame() in +// Python/traceback.c. The function uses heuristics to detect freed memory, +// it's not 100% reliable. +extern int _PyCode_SafeAddr2Line(PyCodeObject *co, int addr); + /** API for executors */ extern void _PyCode_Clear_Executors(PyCodeObject *code); diff --git a/Include/internal/pycore_interpframe.h b/Include/internal/pycore_interpframe.h index d3fd218b27ee..19914e8cef7e 100644 --- a/Include/internal/pycore_interpframe.h +++ b/Include/internal/pycore_interpframe.h @@ -24,6 +24,36 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) { return (PyCodeObject *)executable; } +// Similar to _PyFrame_GetCode(), but return NULL if the frame is invalid or +// freed. Used by dump_frame() in Python/traceback.c. The function uses +// heuristics to detect freed memory, it's not 100% reliable. +static inline PyCodeObject* +_PyFrame_SafeGetCode(_PyInterpreterFrame *f) +{ + // globals and builtins may be NULL on a legit frame, but it's unlikely. + // It's more likely that it's a sign of an invalid frame. + if (f->f_globals == NULL || f->f_builtins == NULL) { + return NULL; + } + + if (PyStackRef_IsNull(f->f_executable)) { + return NULL; + } + void *ptr; + memcpy(&ptr, &f->f_executable, sizeof(f->f_executable)); + if (_PyMem_IsPtrFreed(ptr)) { + return NULL; + } + PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); + if (_PyObject_IsFreed(executable)) { + return NULL; + } + if (!PyCode_Check(executable)) { + return NULL; + } + return (PyCodeObject *)executable; +} + static inline _Py_CODEUNIT * _PyFrame_GetBytecode(_PyInterpreterFrame *f) { @@ -37,6 +67,31 @@ _PyFrame_GetBytecode(_PyInterpreterFrame *f) #endif } +// Similar to PyUnstable_InterpreterFrame_GetLasti(), but return NULL if the +// frame is invalid or freed. Used by dump_frame() in Python/traceback.c. The +// function uses heuristics to detect freed memory, it's not 100% reliable. +static inline int +_PyFrame_SafeGetLasti(struct _PyInterpreterFrame *f) +{ + // Code based on _PyFrame_GetBytecode() but replace _PyFrame_GetCode() + // with _PyFrame_SafeGetCode(). + PyCodeObject *co = _PyFrame_SafeGetCode(f); + if (co == NULL) { + return -1; + } + + _Py_CODEUNIT *bytecode; +#ifdef Py_GIL_DISABLED + _PyCodeArray *tlbc = _PyCode_GetTLBCArray(co); + assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size); + bytecode = (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index]; +#else + bytecode = _PyCode_CODE(co); +#endif + + return (int)(f->instr_ptr - bytecode) * sizeof(_Py_CODEUNIT); +} + static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) { PyObject *func = PyStackRef_AsPyObjectBorrow(f->f_funcobj); assert(PyFunction_Check(func)); diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 3e12084b82ab..e27cff03daa8 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -54,15 +54,17 @@ static inline int _PyMem_IsPtrFreed(const void *ptr) { uintptr_t value = (uintptr_t)ptr; #if SIZEOF_VOID_P == 8 - return (value == 0 + return (value <= 0xff // NULL, 0x1, 0x2, ..., 0xff || value == (uintptr_t)0xCDCDCDCDCDCDCDCD || value == (uintptr_t)0xDDDDDDDDDDDDDDDD - || value == (uintptr_t)0xFDFDFDFDFDFDFDFD); + || value == (uintptr_t)0xFDFDFDFDFDFDFDFD + || value >= (uintptr_t)0xFFFFFFFFFFFFFF00); // -0xff, ..., -2, -1 #elif SIZEOF_VOID_P == 4 - return (value == 0 + return (value <= 0xff || value == (uintptr_t)0xCDCDCDCD || value == (uintptr_t)0xDDDDDDDD - || value == (uintptr_t)0xFDFDFDFD); + || value == (uintptr_t)0xFDFDFDFD + || value >= (uintptr_t)0xFFFFFF00); #else # error "unknown pointer size" #endif diff --git a/Misc/NEWS.d/next/Library/2025-11-02-19-23-32.gh-issue-140815.McEG-T.rst b/Misc/NEWS.d/next/Library/2025-11-02-19-23-32.gh-issue-140815.McEG-T.rst new file mode 100644 index 000000000000..18c4d3836efe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-02-19-23-32.gh-issue-140815.McEG-T.rst @@ -0,0 +1,2 @@ +:mod:`faulthandler` now detects if a frame or a code object is invalid or +freed. Patch by Victor Stinner. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 1dbbb053e3fc..287558302aa6 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1013,8 +1013,8 @@ failed: * source location tracking (co_lines/co_positions) ******************/ -int -_PyCode_Addr2LineNoTstate(PyCodeObject *co, int addrq) +static int +_PyCode_Addr2Line(PyCodeObject *co, int addrq) { if (addrq < 0) { return co->co_firstlineno; @@ -1028,12 +1028,29 @@ _PyCode_Addr2LineNoTstate(PyCodeObject *co, int addrq) return _PyCode_CheckLineNumber(addrq, &bounds); } +int +_PyCode_SafeAddr2Line(PyCodeObject *co, int addrq) +{ + if (addrq < 0) { + return co->co_firstlineno; + } + if (co->_co_monitoring && co->_co_monitoring->lines) { + return _Py_Instrumentation_GetLine(co, addrq/sizeof(_Py_CODEUNIT)); + } + if (!(addrq >= 0 && addrq < _PyCode_NBYTES(co))) { + return -1; + } + PyCodeAddressRange bounds; + _PyCode_InitAddressRange(co, &bounds); + return _PyCode_CheckLineNumber(addrq, &bounds); +} + int PyCode_Addr2Line(PyCodeObject *co, int addrq) { int lineno; Py_BEGIN_CRITICAL_SECTION(co); - lineno = _PyCode_Addr2LineNoTstate(co, addrq); + lineno = _PyCode_Addr2Line(co, addrq); Py_END_CRITICAL_SECTION(); return lineno; } diff --git a/Python/traceback.c b/Python/traceback.c index 11e52936f309..5efa70fb6768 100644 --- a/Python/traceback.c +++ b/Python/traceback.c @@ -976,14 +976,24 @@ done: /* Write a frame into the file fd: "File "xxx", line xxx in xxx". - This function is signal safe. */ + This function is signal safe. -static void + Return 0 on success. Return -1 if the frame is invalid. */ + +static int dump_frame(int fd, _PyInterpreterFrame *frame) { - assert(frame->owner < FRAME_OWNED_BY_INTERPRETER); + if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { + /* Ignore trampoline frame */ + return 0; + } - PyCodeObject *code =_PyFrame_GetCode(frame); + PyCodeObject *code = _PyFrame_SafeGetCode(frame); + if (code == NULL) { + return -1; + } + + int res = 0; PUTS(fd, " File "); if (code->co_filename != NULL && PyUnicode_Check(code->co_filename)) @@ -991,29 +1001,36 @@ dump_frame(int fd, _PyInterpreterFrame *frame) PUTS(fd, "\""); _Py_DumpASCII(fd, code->co_filename); PUTS(fd, "\""); - } else { + } + else { PUTS(fd, "???"); + res = -1; } - int lasti = PyUnstable_InterpreterFrame_GetLasti(frame); - int lineno = _PyCode_Addr2LineNoTstate(code, lasti); + PUTS(fd, ", line "); + int lasti = _PyFrame_SafeGetLasti(frame); + int lineno = -1; + if (lasti >= 0) { + lineno = _PyCode_SafeAddr2Line(code, lasti); + } if (lineno >= 0) { _Py_DumpDecimal(fd, (size_t)lineno); } else { PUTS(fd, "???"); + res = -1; } - PUTS(fd, " in "); - if (code->co_name != NULL - && PyUnicode_Check(code->co_name)) { + PUTS(fd, " in "); + if (code->co_name != NULL && PyUnicode_Check(code->co_name)) { _Py_DumpASCII(fd, code->co_name); } else { PUTS(fd, "???"); + res = -1; } - PUTS(fd, "\n"); + return res; } static int @@ -1056,17 +1073,6 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) unsigned int depth = 0; while (1) { - if (frame->owner == FRAME_OWNED_BY_INTERPRETER) { - /* Trampoline frame */ - frame = frame->previous; - if (frame == NULL) { - break; - } - - /* Can't have more than one shim frame in a row */ - assert(frame->owner != FRAME_OWNED_BY_INTERPRETER); - } - if (MAX_FRAME_DEPTH <= depth) { if (MAX_FRAME_DEPTH < depth) { PUTS(fd, "plus "); @@ -1076,7 +1082,15 @@ dump_traceback(int fd, PyThreadState *tstate, int write_header) break; } - dump_frame(fd, frame); + if (_PyMem_IsPtrFreed(frame)) { + PUTS(fd, " \n"); + break; + } + if (dump_frame(fd, frame) < 0) { + PUTS(fd, " \n"); + break; + } + frame = frame->previous; if (frame == NULL) { break;