From: Victor Stinner Date: Thu, 11 Apr 2019 20:30:31 +0000 (+0200) Subject: [3.7] bpo-36389: _PyObject_IsFreed() now also detects uninitialized memory (GH-12770... X-Git-Tag: v3.7.4rc1~248 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=9e23f0a27cb8bf6e4ea1d2aef36a91502282bbc9;p=thirdparty%2FPython%2Fcpython.git [3.7] bpo-36389: _PyObject_IsFreed() now also detects uninitialized memory (GH-12770) (GH-12788) * bpo-36389: _PyObject_IsFreed() now also detects uninitialized memory (GH-12770) Replace _PyMem_IsFreed() function with _PyMem_IsPtrFreed() inline function. The function is now way more efficient, it became a simple comparison on integers, rather than a short loop. It detects also uninitialized bytes and "forbidden bytes" filled by debug hooks on memory allocators. Add unit tests on _PyObject_IsFreed(). (cherry picked from commit 2b00db68554422ec37faba2a80179a0172df6349) * bpo-36389: Change PyMem_SetupDebugHooks() constants (GH-12782) Modify CLEANBYTE, DEADDYTE and FORBIDDENBYTE constants: use 0xCD, 0xDD and 0xFD, rather than 0xCB, 0xBB and 0xFB, to use the same byte patterns than Windows CRT debug malloc() and free(). (cherry picked from commit 4c409beb4c360a73d054f37807d3daad58d1b567) --- diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index b79b7e49b67e..9b42900b4350 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -440,8 +440,9 @@ Customize Memory Allocators Setup hooks to detect bugs in the Python memory allocator functions. - Newly allocated memory is filled with the byte ``0xCB``, freed memory is - filled with the byte ``0xDB``. + Newly allocated memory is filled with the byte ``0xCD`` (``CLEANBYTE``), + freed memory is filled with the byte ``0xDD`` (``DEADBYTE``). Memory blocks + are surrounded by "forbidden bytes" (``FORBIDDENBYTE``: byte ``0xFD``). Runtime checks: @@ -471,6 +472,12 @@ Customize Memory Allocators if the GIL is held when functions of :c:data:`PYMEM_DOMAIN_OBJ` and :c:data:`PYMEM_DOMAIN_MEM` domains are called. + .. versionchanged:: 3.7.3 + Byte patterns ``0xCB`` (``CLEANBYTE``), ``0xDB`` (``DEADBYTE``) and + ``0xFB`` (``FORBIDDENBYTE``) have been replaced with ``0xCD``, ``0xDD`` + and ``0xFD`` to use the same values than Windows CRT debug ``malloc()`` + and ``free()``. + .. _pymalloc: diff --git a/Include/internal/mem.h b/Include/internal/mem.h index a731e30e6af7..5896e4a05055 100644 --- a/Include/internal/mem.h +++ b/Include/internal/mem.h @@ -145,6 +145,30 @@ PyAPI_FUNC(void) _PyGC_Initialize(struct _gc_runtime_state *); #define _PyGC_generation0 _PyRuntime.gc.generation0 +/* Heuristic checking if a pointer value is newly allocated + (uninitialized) or newly freed. The pointer is not dereferenced, only the + pointer value is checked. + + The heuristic relies on the debug hooks on Python memory allocators which + fills newly allocated memory with CLEANBYTE (0xCD) and newly freed memory + with DEADBYTE (0xDD). Detect also "untouchable bytes" marked + with FORBIDDENBYTE (0xFD). */ +static inline int _PyMem_IsPtrFreed(void *ptr) +{ + uintptr_t value = (uintptr_t)ptr; +#if SIZEOF_VOID_P == 8 + return (value == (uintptr_t)0xCDCDCDCDCDCDCDCD + || value == (uintptr_t)0xDDDDDDDDDDDDDDDD + || value == (uintptr_t)0xFDFDFDFDFDFDFDFD); +#elif SIZEOF_VOID_P == 4 + return (value == (uintptr_t)0xCDCDCDCD + || value == (uintptr_t)0xDDDDDDDD + || value == (uintptr_t)0xFDFDFDFD); +#else +# error "unknown pointer size" +#endif +} + #ifdef __cplusplus } #endif diff --git a/Include/pymem.h b/Include/pymem.h index ef6e0bb5e25f..458a6489c75d 100644 --- a/Include/pymem.h +++ b/Include/pymem.h @@ -55,8 +55,6 @@ PyAPI_FUNC(int) PyTraceMalloc_Untrack( PyAPI_FUNC(PyObject*) _PyTraceMalloc_GetTraceback( unsigned int domain, uintptr_t ptr); - -PyAPI_FUNC(int) _PyMem_IsFreed(void *ptr, size_t size); #endif /* !defined(Py_LIMITED_API) */ diff --git a/Lib/test/test_capi.py b/Lib/test/test_capi.py index 65e0795aba84..d94ee0227c87 100644 --- a/Lib/test/test_capi.py +++ b/Lib/test/test_capi.py @@ -462,11 +462,11 @@ class PyMemDebugTests(unittest.TestCase): r" The [0-9] pad bytes at p-[0-9] are FORBIDDENBYTE, as expected.\n" r" The [0-9] pad bytes at tail={ptr} are not all FORBIDDENBYTE \(0x[0-9a-f]{{2}}\):\n" r" at tail\+0: 0x78 \*\*\* OUCH\n" - r" at tail\+1: 0xfb\n" - r" at tail\+2: 0xfb\n" + r" at tail\+1: 0xfd\n" + r" at tail\+2: 0xfd\n" r" .*\n" r" The block was made by call #[0-9]+ to debug malloc/realloc.\n" - r" Data at p: cb cb cb .*\n" + r" Data at p: cd cd cd .*\n" r"\n" r"Enable tracemalloc to get the memory block allocation traceback\n" r"\n" @@ -482,7 +482,7 @@ class PyMemDebugTests(unittest.TestCase): r" The [0-9] pad bytes at p-[0-9] are FORBIDDENBYTE, as expected.\n" r" The [0-9] pad bytes at tail={ptr} are FORBIDDENBYTE, as expected.\n" r" The block was made by call #[0-9]+ to debug malloc/realloc.\n" - r" Data at p: cb cb cb .*\n" + r" Data at p: cd cd cd .*\n" r"\n" r"Enable tracemalloc to get the memory block allocation traceback\n" r"\n" @@ -508,6 +508,29 @@ class PyMemDebugTests(unittest.TestCase): code = 'import _testcapi; _testcapi.pyobject_malloc_without_gil()' self.check_malloc_without_gil(code) + def check_pyobject_is_freed(self, func): + code = textwrap.dedent(''' + import gc, os, sys, _testcapi + # Disable the GC to avoid crash on GC collection + gc.disable() + obj = _testcapi.{func}() + error = (_testcapi.pyobject_is_freed(obj) == False) + # Exit immediately to avoid a crash while deallocating + # the invalid object + os._exit(int(error)) + ''') + code = code.format(func=func) + assert_python_ok('-c', code, PYTHONMALLOC=self.PYTHONMALLOC) + + def test_pyobject_is_freed_uninitialized(self): + self.check_pyobject_is_freed('pyobject_uninitialized') + + def test_pyobject_is_freed_forbidden_bytes(self): + self.check_pyobject_is_freed('pyobject_forbidden_bytes') + + def test_pyobject_is_freed_free(self): + self.check_pyobject_is_freed('pyobject_freed') + class PyMemMallocDebugTests(PyMemDebugTests): PYTHONMALLOC = 'malloc_debug' diff --git a/Misc/NEWS.d/next/C API/2019-04-11-12-20-35.bpo-36389.P9QFoP.rst b/Misc/NEWS.d/next/C API/2019-04-11-12-20-35.bpo-36389.P9QFoP.rst new file mode 100644 index 000000000000..f2b507a9c230 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2019-04-11-12-20-35.bpo-36389.P9QFoP.rst @@ -0,0 +1,5 @@ +Change the value of ``CLEANBYTE``, ``DEADDYTE`` and ``FORBIDDENBYTE`` internal +constants used by debug hooks on Python memory allocators +(:c:func:`PyMem_SetupDebugHooks` function). Byte patterns ``0xCB``, ``0xDB`` +and ``0xFB`` have been replaced with ``0xCD``, ``0xDD`` and ``0xFD`` to use the +same values than Windows CRT debug ``malloc()`` and ``free()``. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 1e33ca872d45..b864f9270e9d 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -4227,6 +4227,59 @@ test_pymem_getallocatorsname(PyObject *self, PyObject *args) } +static PyObject* +pyobject_is_freed(PyObject *self, PyObject *op) +{ + int res = _PyObject_IsFreed(op); + return PyBool_FromLong(res); +} + + +static PyObject* +pyobject_uninitialized(PyObject *self, PyObject *args) +{ + PyObject *op = (PyObject *)PyObject_Malloc(sizeof(PyObject)); + if (op == NULL) { + return NULL; + } + /* Initialize reference count to avoid early crash in ceval or GC */ + Py_REFCNT(op) = 1; + /* object fields like ob_type are uninitialized! */ + return op; +} + + +static PyObject* +pyobject_forbidden_bytes(PyObject *self, PyObject *args) +{ + /* Allocate an incomplete PyObject structure: truncate 'ob_type' field */ + PyObject *op = (PyObject *)PyObject_Malloc(offsetof(PyObject, ob_type)); + if (op == NULL) { + return NULL; + } + /* Initialize reference count to avoid early crash in ceval or GC */ + Py_REFCNT(op) = 1; + /* ob_type field is after the memory block: part of "forbidden bytes" + when using debug hooks on memory allocatrs! */ + return op; +} + + +static PyObject* +pyobject_freed(PyObject *self, PyObject *args) +{ + PyObject *op = _PyObject_CallNoArg((PyObject *)&PyBaseObject_Type); + if (op == NULL) { + return NULL; + } + Py_TYPE(op)->tp_dealloc(op); + /* Reset reference count to avoid early crash in ceval or GC */ + Py_REFCNT(op) = 1; + /* object memory is freed! */ + return op; +} + + static PyObject* pyobject_malloc_without_gil(PyObject *self, PyObject *args) { @@ -4788,6 +4841,10 @@ static PyMethodDef TestMethods[] = { {"pymem_api_misuse", pymem_api_misuse, METH_NOARGS}, {"pymem_malloc_without_gil", pymem_malloc_without_gil, METH_NOARGS}, {"pymem_getallocatorsname", test_pymem_getallocatorsname, METH_NOARGS}, + {"pyobject_is_freed", (PyCFunction)(void(*)(void))pyobject_is_freed, METH_O}, + {"pyobject_uninitialized", pyobject_uninitialized, METH_NOARGS}, + {"pyobject_forbidden_bytes", pyobject_forbidden_bytes, METH_NOARGS}, + {"pyobject_freed", pyobject_freed, METH_NOARGS}, {"pyobject_malloc_without_gil", pyobject_malloc_without_gil, METH_NOARGS}, {"tracemalloc_track", tracemalloc_track, METH_VARARGS}, {"tracemalloc_untrack", tracemalloc_untrack, METH_VARARGS}, diff --git a/Objects/object.c b/Objects/object.c index 138df4488027..420af9465b5c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -411,28 +411,26 @@ _Py_BreakPoint(void) } -/* Heuristic checking if the object memory has been deallocated. - Rely on the debug hooks on Python memory allocators which fills the memory - with DEADBYTE (0xDB) when memory is deallocated. +/* Heuristic checking if the object memory is uninitialized or deallocated. + Rely on the debug hooks on Python memory allocators: + see _PyMem_IsPtrFreed(). The function can be used to prevent segmentation fault on dereferencing - pointers like 0xdbdbdbdbdbdbdbdb. Such pointer is very unlikely to be mapped - in memory. */ + pointers like 0xDDDDDDDDDDDDDDDD. */ int _PyObject_IsFreed(PyObject *op) { - uintptr_t ptr = (uintptr_t)op; - if (_PyMem_IsFreed(&ptr, sizeof(ptr))) { + if (_PyMem_IsPtrFreed(op) || _PyMem_IsPtrFreed(op->ob_type)) { return 1; } - int freed = _PyMem_IsFreed(&op->ob_type, sizeof(op->ob_type)); - /* ignore op->ob_ref: the value can have be modified + /* ignore op->ob_ref: its value can have be modified by Py_INCREF() and Py_DECREF(). */ #ifdef Py_TRACE_REFS - freed &= _PyMem_IsFreed(&op->_ob_next, sizeof(op->_ob_next)); - freed &= _PyMem_IsFreed(&op->_ob_prev, sizeof(op->_ob_prev)); + if (_PyMem_IsPtrFreed(op->_ob_next) || _PyMem_IsPtrFreed(op->_ob_prev)) { + return 1; + } #endif - return freed; + return 0; } @@ -449,7 +447,7 @@ _PyObject_Dump(PyObject* op) if (_PyObject_IsFreed(op)) { /* It seems like the object memory has been freed: don't access it to prevent a segmentation fault. */ - fprintf(stderr, "\n"); + fprintf(stderr, "\n"); return; } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3b0c35bcc941..46e84270b26e 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1946,14 +1946,17 @@ _Py_GetAllocatedBlocks(void) /* Special bytes broadcast into debug memory blocks at appropriate times. * Strings of these are unlikely to be valid addresses, floats, ints or - * 7-bit ASCII. + * 7-bit ASCII. If modified, _PyMem_IsPtrFreed() should be updated as well. + * + * Byte patterns 0xCB, 0xBB and 0xFB have been replaced with 0xCD, 0xDD and + * 0xFD to use the same values than Windows CRT debug malloc() and free(). */ #undef CLEANBYTE #undef DEADBYTE #undef FORBIDDENBYTE -#define CLEANBYTE 0xCB /* clean (newly allocated) memory */ -#define DEADBYTE 0xDB /* dead (newly freed) memory */ -#define FORBIDDENBYTE 0xFB /* untouchable bytes at each end of a block */ +#define CLEANBYTE 0xCD /* clean (newly allocated) memory */ +#define DEADBYTE 0xDD /* dead (newly freed) memory */ +#define FORBIDDENBYTE 0xFD /* untouchable bytes at each end of a block */ static size_t serialno = 0; /* incremented on each debug {m,re}alloc */ @@ -2091,22 +2094,6 @@ _PyMem_DebugRawCalloc(void *ctx, size_t nelem, size_t elsize) } -/* Heuristic checking if the memory has been freed. Rely on the debug hooks on - Python memory allocators which fills the memory with DEADBYTE (0xDB) when - memory is deallocated. */ -int -_PyMem_IsFreed(void *ptr, size_t size) -{ - unsigned char *bytes = ptr; - for (size_t i=0; i < size; i++) { - if (bytes[i] != DEADBYTE) { - return 0; - } - } - return 1; -} - - /* The debug free first checks the 2*SST bytes on each end for sanity (in particular, that the FORBIDDENBYTEs with the api ID are still intact). Then fills the original bytes with DEADBYTE.