From: Tim Peters Date: Thu, 20 Nov 2003 22:13:51 +0000 (+0000) Subject: SF bug 839548: Bug in type's GC handling causes segfaults. X-Git-Tag: v2.3.3c1~46 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ca6919c1805d20533d2be8e68e2849e15aff7bf7;p=thirdparty%2FPython%2Fcpython.git SF bug 839548: Bug in type's GC handling causes segfaults. Also SF patch 843455. This is a critical bugfix, backported from 2.4 development. I don't intend to backport beyond 2.3 maint. The bugs this fixes have been there since weakrefs were introduced. --- diff --git a/Include/weakrefobject.h b/Include/weakrefobject.h index b6fc389f6717..effa0ed451b6 100644 --- a/Include/weakrefobject.h +++ b/Include/weakrefobject.h @@ -39,6 +39,8 @@ PyAPI_FUNC(PyObject *) PyWeakref_GetObject(PyObject *ref); PyAPI_FUNC(long) _PyWeakref_GetWeakrefCount(PyWeakReference *head); +PyAPI_FUNC(void) _PyWeakref_ClearRef(PyWeakReference *self); + #define PyWeakref_GET_OBJECT(ref) (((PyWeakReference *)(ref))->wr_object) diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index 093133eb67b8..87764f90d897 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -338,6 +338,211 @@ class ReferencesTestCase(TestBase): # deallocation of c2. del c2 + def test_callback_in_cycle_1(self): + import gc + + class J(object): + pass + + class II(object): + def acallback(self, ignore): + self.J + + I = II() + I.J = J + I.wr = weakref.ref(J, I.acallback) + + # Now J and II are each in a self-cycle (as all new-style class + # objects are, since their __mro__ points back to them). I holds + # both a weak reference (I.wr) and a strong reference (I.J) to class + # J. I is also in a cycle (I.wr points to a weakref that references + # I.acallback). When we del these three, they all become trash, but + # the cycles prevent any of them from getting cleaned up immediately. + # Instead they have to wait for cyclic gc to deduce that they're + # trash. + # + # gc used to call tp_clear on all of them, and the order in which + # it does that is pretty accidental. The exact order in which we + # built up these things manages to provoke gc into running tp_clear + # in just the right order (I last). Calling tp_clear on II leaves + # behind an insane class object (its __mro__ becomes NULL). Calling + # tp_clear on J breaks its self-cycle, but J doesn't get deleted + # just then because of the strong reference from I.J. Calling + # tp_clear on I starts to clear I's __dict__, and just happens to + # clear I.J first -- I.wr is still intact. That removes the last + # reference to J, which triggers the weakref callback. The callback + # tries to do "self.J", and instances of new-style classes look up + # attributes ("J") in the class dict first. The class (II) wants to + # search II.__mro__, but that's NULL. The result was a segfault in + # a release build, and an assert failure in a debug build. + del I, J, II + gc.collect() + + def test_callback_in_cycle_2(self): + import gc + + # This is just like test_callback_in_cycle_1, except that II is an + # old-style class. The symptom is different then: an instance of an + # old-style class looks in its own __dict__ first. 'J' happens to + # get cleared from I.__dict__ before 'wr', and 'J' was never in II's + # __dict__, so the attribute isn't found. The difference is that + # the old-style II doesn't have a NULL __mro__ (it doesn't have any + # __mro__), so no segfault occurs. Instead it got: + # test_callback_in_cycle_2 (__main__.ReferencesTestCase) ... + # Exception exceptions.AttributeError: + # "II instance has no attribute 'J'" in > ignored + + class J(object): + pass + + class II: + def acallback(self, ignore): + self.J + + I = II() + I.J = J + I.wr = weakref.ref(J, I.acallback) + + del I, J, II + gc.collect() + + def test_callback_in_cycle_3(self): + import gc + + # This one broke the first patch that fixed the last two. In this + # case, the objects reachable from the callback aren't also reachable + # from the object (c1) *triggering* the callback: you can get to + # c1 from c2, but not vice-versa. The result was that c2's __dict__ + # got tp_clear'ed by the time the c2.cb callback got invoked. + + class C: + def cb(self, ignore): + self.me + self.c1 + self.wr + + c1, c2 = C(), C() + + c2.me = c2 + c2.c1 = c1 + c2.wr = weakref.ref(c1, c2.cb) + + del c1, c2 + gc.collect() + + def test_callback_in_cycle_4(self): + import gc + + # Like test_callback_in_cycle_3, except c2 and c1 have different + # classes. c2's class (C) isn't reachable from c1 then, so protecting + # objects reachable from the dying object (c1) isn't enough to stop + # c2's class (C) from getting tp_clear'ed before c2.cb is invoked. + # The result was a segfault (C.__mro__ was NULL when the callback + # tried to look up self.me). + + class C(object): + def cb(self, ignore): + self.me + self.c1 + self.wr + + class D: + pass + + c1, c2 = D(), C() + + c2.me = c2 + c2.c1 = c1 + c2.wr = weakref.ref(c1, c2.cb) + + del c1, c2, C, D + gc.collect() + + def test_callback_in_cycle_resurrection(self): + import gc + + # Do something nasty in a weakref callback: resurrect objects + # from dead cycles. For this to be attempted, the weakref and + # its callback must also be part of the cyclic trash (else the + # objects reachable via the callback couldn't be in cyclic trash + # to begin with -- the callback would act like an external root). + # But gc clears trash weakrefs with callbacks early now, which + # disables the callbacks, so the callbacks shouldn't get called + # at all (and so nothing actually gets resurrected). + + alist = [] + class C(object): + def __init__(self, value): + self.attribute = value + + def acallback(self, ignore): + alist.append(self.c) + + c1, c2 = C(1), C(2) + c1.c = c2 + c2.c = c1 + c1.wr = weakref.ref(c2, c1.acallback) + c2.wr = weakref.ref(c1, c2.acallback) + + def C_went_away(ignore): + alist.append("C went away") + wr = weakref.ref(C, C_went_away) + + del c1, c2, C # make them all trash + self.assertEqual(alist, []) # del isn't enough to reclaim anything + + gc.collect() + # c1.wr and c2.wr were part of the cyclic trash, so should have + # been cleared without their callbacks executing. OTOH, the weakref + # to C is bound to a function local (wr), and wasn't trash, so that + # callback should have been invoked when C went away. + self.assertEqual(alist, ["C went away"]) + # The remaining weakref should be dead now (its callback ran). + self.assertEqual(wr(), None) + + del alist[:] + gc.collect() + self.assertEqual(alist, []) + + def test_callbacks_on_callback(self): + import gc + + # Set up weakref callbacks *on* weakref callbacks. + alist = [] + def safe_callback(ignore): + alist.append("safe_callback called") + + class C(object): + def cb(self, ignore): + alist.append("cb called") + + c, d = C(), C() + c.other = d + d.other = c + callback = c.cb + c.wr = weakref.ref(d, callback) # this won't trigger + d.wr = weakref.ref(callback, d.cb) # ditto + external_wr = weakref.ref(callback, safe_callback) # but this will + self.assert_(external_wr() is callback) + + # The weakrefs attached to c and d should get cleared, so that + # C.cb is never called. But external_wr isn't part of the cyclic + # trash, and no cyclic trash is reachable from it, so safe_callback + # should get invoked when the bound method object callback (c.cb) + # -- which is itself a callback, and also part of the cyclic trash -- + # gets reclaimed at the end of gc. + + del callback, c, d, C + self.assertEqual(alist, []) # del isn't enough to clean up cycles + gc.collect() + self.assertEqual(alist, ["safe_callback called"]) + self.assertEqual(external_wr(), None) + + del alist[:] + gc.collect() + self.assertEqual(alist, []) + class Object: def __init__(self, arg): self.arg = arg diff --git a/Misc/NEWS b/Misc/NEWS index bcb3b5f1d762..53ef6f034d46 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,21 @@ What's New in Python 2.3.3c1? Core and builtins ----------------- +- Critical bugfix, for SF bug 839548: if a weakref with a callback, + its callback, and its weakly referenced object, all became part of + cyclic garbage during a single run of garbage collection, the order + in which they were torn down was unpredictable. It was possible for + the callback to see partially-torn-down objects, leading to immediate + segfaults, or, if the callback resurrected garbage objects, to + resurrect insane objects that caused segfaults (or other surprises) + later. In one sense this wasn't surprising, because Python's cyclic gc + had no knowledge of Python's weakref objects. It does now. When + weakrefs with callbacks become part of cyclic garbage now, those + weakrefs are cleared first. The callbacks don't trigger then, + preventing the problems. If you need callbacks to trigger, then just + as when cyclic gc is not involved, you need to write your code so + that weakref objects outlive the objects they weakly reference. + - Critical bugfix, for SF bug 840829: if cyclic garbage collection happened to occur during a weakref callback for a new-style class instance, subtle memory corruption could result (in a release build; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index bb40b8f9c7c1..ada1f8ec3608 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -377,13 +377,17 @@ has_finalizer(PyObject *op) return 0; } -/* Move the objects in unreachable with __del__ methods into finalizers. - * The objects remaining in unreachable do not have __del__ methods, and - * gc_refs remains GC_TENTATIVELY_UNREACHABLE for them. The objects - * moved into finalizers have gc_refs changed to GC_REACHABLE. +/* Move the objects in unreachable with __del__ methods into finalizers, + * and weakrefs with callbacks into wr_callbacks. + * The objects remaining in unreachable do not have __del__ methods, and are + * not weakrefs with callbacks. + * The objects moved have gc_refs changed to GC_REACHABLE; the objects + * remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE. */ static void -move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) +move_troublemakers(PyGC_Head *unreachable, + PyGC_Head *finalizers, + PyGC_Head *wr_callbacks) { PyGC_Head *gc = unreachable->gc.gc_next; @@ -398,6 +402,12 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) gc_list_append(gc, finalizers); gc->gc.gc_refs = GC_REACHABLE; } + else if (PyWeakref_Check(op) && + ((PyWeakReference *)op)->wr_callback) { + gc_list_remove(gc); + gc_list_append(gc, wr_callbacks); + gc->gc.gc_refs = GC_REACHABLE; + } gc = next; } } @@ -434,6 +444,93 @@ move_finalizer_reachable(PyGC_Head *finalizers) } } +/* Clear all trash weakrefs with callbacks. This clears weakrefs first, + * which has the happy result of disabling the callbacks without executing + * them. A nasty technical complication: a weakref callback can itself be + * the target of a weakref, in which case decrefing the callback can cause + * another callback to trigger. But we can't allow arbitrary Python code to + * get executed at this point (the callback on the callback may try to muck + * with other cyclic trash we're trying to collect, even resurrecting it + * while we're in the middle of doing tp_clear() on the trash). + * + * The private _PyWeakref_ClearRef() function exists so that we can clear + * the reference in a weakref without triggering a callback on the callback. + * + * We have to save the callback objects and decref them later. But we can't + * allocate new memory to save them (if we can't get new memory, we're dead). + * So we grab a new reference on the clear'ed weakref, which prevents the + * rest of gc from reclaiming it. _PyWeakref_ClearRef() leaves the + * weakref's wr_callback member intact. + * + * In the end, then, wr_callbacks consists of cleared weakrefs that are + * immune from collection. Near the end of gc, after collecting all the + * cyclic trash, we call release_weakrefs(). That releases our references + * to the cleared weakrefs, which in turn may trigger callbacks on their + * callbacks. + */ +static void +clear_weakrefs(PyGC_Head *wr_callbacks) +{ + PyGC_Head *gc = wr_callbacks->gc.gc_next; + + for (; gc != wr_callbacks; gc = gc->gc.gc_next) { + PyObject *op = FROM_GC(gc); + PyWeakReference *wr; + + assert(IS_REACHABLE(op)); + assert(PyWeakref_Check(op)); + wr = (PyWeakReference *)op; + assert(wr->wr_callback != NULL); + Py_INCREF(op); + _PyWeakref_ClearRef(wr); + } +} + +/* Called near the end of gc. This gives up the references we own to + * cleared weakrefs, allowing them to get collected, and in turn decref'ing + * their callbacks. + * + * If a callback object is itself the target of a weakref callback, + * decref'ing the callback object may trigger that other callback. If + * that other callback was part of the cyclic trash in this generation, + * that won't happen, since we cleared *all* trash-weakref callbacks near + * the start of gc. If that other callback was not part of the cyclic trash + * in this generation, then it acted like an external root to this round + * of gc, so all the objects reachable from that callback are still alive. + * + * Giving up the references to the weakref objects will probably make + * them go away too. However, if a weakref is reachable from finalizers, + * it won't go away. We move it to the old generation then. Since a + * weakref object doesn't have a finalizer, that's the right thing to do (it + * doesn't belong in gc.garbage). + * + * We return the number of weakref objects freed (those not appended to old). + */ +static int +release_weakrefs(PyGC_Head *wr_callbacks, PyGC_Head *old) +{ + int num_freed = 0; + + while (! gc_list_is_empty(wr_callbacks)) { + PyGC_Head *gc = wr_callbacks->gc.gc_next; + PyObject *op = FROM_GC(gc); + PyWeakReference *wr = (PyWeakReference *)op; + + assert(IS_REACHABLE(op)); + assert(PyWeakref_Check(op)); + assert(wr->wr_callback != NULL); + Py_DECREF(op); + if (wr_callbacks->gc.gc_next == gc) { + /* object is still alive -- move it */ + gc_list_remove(gc); + gc_list_append(gc, old); + } + else + ++num_freed; + } + return num_freed; +} + static void debug_instance(char *msg, PyInstanceObject *inst) { @@ -535,8 +632,9 @@ collect(int generation) long n = 0; /* # unreachable objects that couldn't be collected */ PyGC_Head *young; /* the generation we are examining */ PyGC_Head *old; /* next older generation */ - PyGC_Head unreachable; - PyGC_Head finalizers; + PyGC_Head unreachable; /* non-problematic unreachable trash */ + PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ + PyGC_Head wr_callbacks; /* weakrefs with callbacks */ PyGC_Head *gc; if (delstr == NULL) { @@ -597,20 +695,33 @@ collect(int generation) /* All objects in unreachable are trash, but objects reachable from * finalizers can't safely be deleted. Python programmers should take * care not to create such things. For Python, finalizers means - * instance objects with __del__ methods. + * instance objects with __del__ methods. Weakrefs with callbacks + * can call arbitrary Python code, so those are special-cased too. * - * Move unreachable objects with finalizers into a different list. + * Move unreachable objects with finalizers, and weakrefs with + * callbacks, into different lists. */ gc_list_init(&finalizers); - move_finalizers(&unreachable, &finalizers); + gc_list_init(&wr_callbacks); + move_troublemakers(&unreachable, &finalizers, &wr_callbacks); + /* Clear the trash weakrefs with callbacks. This prevents their + * callbacks from getting invoked (when a weakref goes away, so does + * its callback). + * We do this even if the weakrefs are reachable from finalizers. + * If we didn't, breaking cycles in unreachable later could trigger + * deallocation of objects in finalizers, which could in turn + * cause callbacks to trigger. This may not be ideal behavior. + */ + clear_weakrefs(&wr_callbacks); /* finalizers contains the unreachable objects with a finalizer; - * unreachable objects reachable only *from* those are also - * uncollectable, and we move those into the finalizers list too. + * unreachable objects reachable *from* those are also uncollectable, + * and we move those into the finalizers list too. */ move_finalizer_reachable(&finalizers); /* Collect statistics on collectable objects found and print - * debugging information. */ + * debugging information. + */ for (gc = unreachable.gc.gc_next; gc != &unreachable; gc = gc->gc.gc_next) { m++; @@ -624,6 +735,11 @@ collect(int generation) */ delete_garbage(&unreachable, old); + /* Now that we're done analyzing stuff and breaking cycles, let + * delayed weakref callbacks run. + */ + m += release_weakrefs(&wr_callbacks, old); + /* Collect statistics on uncollectable objects found and print * debugging information. */ for (gc = finalizers.gc.gc_next; diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index f5be759a63f4..db1f8d190659 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -53,17 +53,43 @@ clear_weakref(PyWeakReference *self) if (*list == self) *list = self->wr_next; self->wr_object = Py_None; - self->wr_callback = NULL; if (self->wr_prev != NULL) self->wr_prev->wr_next = self->wr_next; if (self->wr_next != NULL) self->wr_next->wr_prev = self->wr_prev; self->wr_prev = NULL; self->wr_next = NULL; - Py_XDECREF(callback); + } + if (callback != NULL) { + Py_DECREF(callback); + self->wr_callback = NULL; } } +/* Cyclic gc uses this to *just* clear the passed-in reference, leaving + * the callback intact and uncalled. It must be possible to call self's + * tp_dealloc() after calling this, so self has to be left in a sane enough + * state for that to work. We expect tp_dealloc to decref the callback + * then. The reason for not letting clear_weakref() decref the callback + * right now is that if the callback goes away, that may in turn trigger + * another callback (if a weak reference to the callback exists) -- running + * arbitrary Python code in the middle of gc is a disaster. The convolution + * here allows gc to delay triggering such callbacks until the world is in + * a sane state again. + */ +void +_PyWeakref_ClearRef(PyWeakReference *self) +{ + PyObject *callback; + + assert(self != NULL); + assert(PyWeakref_Check(self)); + /* Preserve and restore the callback around clear_weakref. */ + callback = self->wr_callback; + self->wr_callback = NULL; + clear_weakref(self); + self->wr_callback = callback; +} static void weakref_dealloc(PyWeakReference *self) @@ -117,7 +143,7 @@ weakref_hash(PyWeakReference *self) self->hash = PyObject_Hash(PyWeakref_GET_OBJECT(self)); return self->hash; } - + static PyObject * weakref_repr(PyWeakReference *self) @@ -324,7 +350,7 @@ WRAP_BINARY(proxy_iand, PyNumber_InPlaceAnd) WRAP_BINARY(proxy_ixor, PyNumber_InPlaceXor) WRAP_BINARY(proxy_ior, PyNumber_InPlaceOr) -static int +static int proxy_nonzero(PyWeakReference *proxy) { PyObject *o = PyWeakref_GET_OBJECT(proxy);