From: Bart Van Assche Date: Fri, 29 Jul 2011 12:39:44 +0000 (+0000) Subject: drd: Fix a race condition in the barrier implementation that could result in false... X-Git-Tag: svn/VALGRIND_3_7_0~292 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ab40e94dad38576b22e7e38240ab249927a1af91;p=thirdparty%2Fvalgrind.git drd: Fix a race condition in the barrier implementation that could result in false positives. What could occur before this fix is: - The pthread_barrier() call in a first thread finishes. - Another thread invokes pthread_join() on that thread, causing the information associated with that thread to be removed from the barrier object. - The pthread_barrier() call in another thread finishes. Because some thread information has already been removed from the barrier object, the per-thread vector clock "last" won't be computed correctly by DRD_(barrier_post_wait)(). - Because of the above false positives could be reported. This resulted in sporadic failure of the drd/tests/matinv regression test, and should now be fixed. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11945 --- diff --git a/drd/drd_barrier.c b/drd/drd_barrier.c index ff94454b1d..7fb02e2d3b 100644 --- a/drd/drd_barrier.c +++ b/drd/drd_barrier.c @@ -43,10 +43,11 @@ struct barrier_thread_info { UWord tid; // A DrdThreadId declared as UWord because // this member variable is the key of an OSet. - Segment* sg[2]; // Segments of the last two - // pthread_barrier() calls by thread tid. - ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call. + Segment* sg; // Segment of the last pthread_barrier() call + // by thread tid. Segment* post_wait_sg; // Segment created after *_barrier_wait() finished + ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call. + Bool thread_finished;// Whether thread 'tid' has finished. }; @@ -83,11 +84,11 @@ static void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p, const DrdThreadId tid) { - p->tid = tid; - p->sg[0] = 0; - p->sg[1] = 0; - p->wait_call_ctxt = 0; - p->post_wait_sg = 0; + p->tid = tid; + p->sg = NULL; + p->post_wait_sg = 0; + p->wait_call_ctxt = 0; + p->thread_finished = False; } /** @@ -97,8 +98,7 @@ void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p, static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p) { tl_assert(p); - DRD_(sg_put)(p->sg[0]); - DRD_(sg_put)(p->sg[1]); + DRD_(sg_put)(p->sg); DRD_(sg_put)(p->post_wait_sg); } @@ -112,6 +112,8 @@ void DRD_(barrier_initialize)(struct barrier_info* const p, const BarrierT barrier_type, const Word count) { + int i; + tl_assert(barrier != 0); tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier); tl_assert(p->a1 == barrier); @@ -129,8 +131,10 @@ void DRD_(barrier_initialize)(struct barrier_info* const p, tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word)); tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) >= sizeof(DrdThreadId)); - p->oset = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1", - VG_(free)); + for (i = 0; i < 2; i++) { + p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1", + VG_(free)); + } } /** @@ -143,11 +147,15 @@ static void barrier_cleanup(struct barrier_info* p) { struct barrier_thread_info* q; Segment* latest_sg = 0; + OSet* oset; + int i; tl_assert(p); - if (p->pre_waiters_left != p->count) - { + DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)()); + tl_assert(latest_sg); + + if (p->pre_waiters_left != p->count) { BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; VG_(maybe_record_error)(VG_(get_running_tid)(), BarrierErr, @@ -155,23 +163,23 @@ static void barrier_cleanup(struct barrier_info* p) "Destruction of barrier that is being waited" " upon", &bei); - } - - DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)()); - tl_assert(latest_sg); - - VG_(OSetGen_ResetIter)(p->oset); - for ( ; (q = VG_(OSetGen_Next)(p->oset)) != 0; ) - { - if (q->post_wait_sg - && ! DRD_(vc_lte)(&q->post_wait_sg->vc, &latest_sg->vc)) - { - barrier_report_wait_delete_race(p, q); + } else { + oset = p->oset[1 - p->pre_iteration]; + VG_(OSetGen_ResetIter)(oset); + for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) { + if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc, + &latest_sg->vc)) + { + barrier_report_wait_delete_race(p, q); + } + DRD_(barrier_thread_destroy)(q); } + } - DRD_(barrier_thread_destroy)(q); + for (i = 0; i < 2; i++) { + VG_(OSetGen_Destroy)(p->oset[i]); + p->oset[i] = NULL; } - VG_(OSetGen_Destroy)(p->oset); DRD_(sg_put)(latest_sg); } @@ -200,8 +208,8 @@ DRD_(barrier_get_or_allocate)(const Addr barrier, } /** - * Look up the address of the information associated with the client-side - * barrier object. + * Look up the address of the struct barrier_info associated with the + * client-side barrier object. */ static struct barrier_info* DRD_(barrier_get)(const Addr barrier) { @@ -210,8 +218,9 @@ static struct barrier_info* DRD_(barrier_get)(const Addr barrier) } /** - * Initialize a barrier with client address barrier, client size size, and - * where count threads participate in each barrier. + * Initialize a barrier with given client address, barrier type and number of + * participants. The 'reinitialization' argument indicates whether a barrier + * object is being initialized or reinitialized. * * Called before pthread_barrier_init(). */ @@ -246,6 +255,7 @@ void DRD_(barrier_init)(const Addr barrier, &bei); } } + p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count); if (s_trace_barrier) @@ -336,6 +346,7 @@ void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier, struct barrier_info* p; struct barrier_thread_info* q; const UWord word_tid = tid; + OSet* oset; p = DRD_(barrier_get)(barrier); if (p == 0 && barrier_type == gomp_barrier) @@ -365,14 +376,26 @@ void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier, p->pre_iteration); } + /* Clean up nodes associated with finished threads. */ + oset = p->oset[p->pre_iteration]; + tl_assert(oset); + VG_(OSetGen_ResetIter)(oset); + for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) { + if (q->thread_finished) { + void* r = VG_(OSetGen_Remove)(oset, &q->tid); + tl_assert(r == q); + DRD_(barrier_thread_destroy)(q); + VG_(OSetGen_FreeNode)(oset, q); + VG_(OSetGen_ResetIterAt)(oset, &word_tid); + } + } /* Allocate the per-thread data structure if necessary. */ - q = VG_(OSetGen_Lookup)(p->oset, &word_tid); - if (q == 0) - { - q = VG_(OSetGen_AllocNode)(p->oset, sizeof(*q)); + q = VG_(OSetGen_Lookup)(oset, &word_tid); + if (q == NULL) { + q = VG_(OSetGen_AllocNode)(oset, sizeof(*q)); DRD_(barrier_thread_initialize)(q, tid); - VG_(OSetGen_Insert)(p->oset, q); - tl_assert(VG_(OSetGen_Lookup)(p->oset, &word_tid) == q); + VG_(OSetGen_Insert)(oset, q); + tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q); } /* Record *_barrier_wait() call context. */ @@ -382,7 +405,7 @@ void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier, * Store a pointer to the latest segment of the current thread in the * per-thread data structure. */ - DRD_(thread_get_latest_segment)(&q->sg[p->pre_iteration], tid); + DRD_(thread_get_latest_segment)(&q->sg, tid); /* * If the same number of threads as the barrier count indicates have @@ -405,6 +428,7 @@ void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier, const UWord word_tid = tid; struct barrier_thread_info* q; struct barrier_thread_info* r; + OSet* oset; p = DRD_(barrier_get)(barrier); @@ -432,7 +456,8 @@ void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier, if (! waited) return; - q = VG_(OSetGen_Lookup)(p->oset, &word_tid); + oset = p->oset[p->post_iteration]; + q = VG_(OSetGen_Lookup)(oset, &word_tid); if (q == 0) { BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 }; @@ -445,10 +470,10 @@ void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier, " barrier_destroy()", &bei); - q = VG_(OSetGen_AllocNode)(p->oset, sizeof(*q)); + q = VG_(OSetGen_AllocNode)(oset, sizeof(*q)); DRD_(barrier_thread_initialize)(q, tid); - VG_(OSetGen_Insert)(p->oset, q); - tl_assert(VG_(OSetGen_Lookup)(p->oset, &word_tid) == q); + VG_(OSetGen_Insert)(oset, q); + tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q); } /* Create a new segment and store a pointer to that segment. */ @@ -464,14 +489,14 @@ void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier, VectorClock old_vc; DRD_(vc_copy)(&old_vc, &DRD_(g_threadinfo)[tid].last->vc); - VG_(OSetGen_ResetIter)(p->oset); - for ( ; (r = VG_(OSetGen_Next)(p->oset)) != 0; ) + VG_(OSetGen_ResetIter)(oset); + for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; ) { if (r != q) { - tl_assert(r->sg[p->post_iteration]); + tl_assert(r->sg); DRD_(vc_combine)(&DRD_(g_threadinfo)[tid].last->vc, - &r->sg[p->post_iteration]->vc); + &r->sg->vc); } } DRD_(thread_update_conflict_set)(tid, &old_vc); @@ -496,17 +521,12 @@ static void barrier_delete_thread(struct barrier_info* const p, { struct barrier_thread_info* q; const UWord word_tid = tid; + int i; - q = VG_(OSetGen_Remove)(p->oset, &word_tid); - - /* - * q is only non-zero if the barrier object has been used by thread tid - * after the barrier_init() call and before the thread finished. - */ - if (q) - { - DRD_(barrier_thread_destroy)(q); - VG_(OSetGen_FreeNode)(p->oset, q); + for (i = 0; i < 2; i++) { + q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid); + if (q) + q->thread_finished = True; } } diff --git a/drd/drd_clientobj.h b/drd/drd_clientobj.h index 86e58d6d56..11e39f6046 100644 --- a/drd/drd_clientobj.h +++ b/drd/drd_clientobj.h @@ -125,7 +125,8 @@ struct barrier_info Word post_iteration; // post barrier completion count modulo two. Word pre_waiters_left; // number of waiters left for a complete barrier. Word post_waiters_left; // number of waiters left for a complete barrier. - OSet* oset; // Per-thread barrier information. + OSet* oset[2]; // Per-thread barrier information for the latest + // two barrier iterations. }; struct rwlock_info