From: Julian Seward Date: Mon, 22 Feb 2010 11:03:10 +0000 (+0000) Subject: When creating a child thread, initially set its os_state.threadgroup X-Git-Tag: svn/VALGRIND_3_6_0~371 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e6c318c531f14697c8b13861fcfada90b2debe89;p=thirdparty%2Fvalgrind.git When creating a child thread, initially set its os_state.threadgroup to have the same value as the parent. This avoids exit races leading to hangs and strange behaviour in heavily multithreaded apps, in the situation where threads are rapidly being created, and at the same time an existing thread does sys_exit_group so as to terminate the entire process. Thanks to Konstantin S for chasing this down to a small test case. Fixes #226116. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11053 --- diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c index c8355a3467..dd7d1454b8 100644 --- a/coregrind/m_syswrap/syswrap-amd64-linux.c +++ b/coregrind/m_syswrap/syswrap-amd64-linux.c @@ -251,6 +251,17 @@ static SysRes do_clone ( ThreadId ptid, ctst->sig_mask = ptst->sig_mask; ctst->tmp_sig_mask = ptst->sig_mask; + /* Start the child with its threadgroup being the same as the + parent's. This is so that any exit_group calls that happen + after the child is created but before it sets its + os_state.threadgroup field for real (in thread_wrapper in + syswrap-linux.c), really kill the new thread. a.k.a this avoids + a race condition in which the thread is unkillable (via + exit_group) because its threadgroup is not set. The race window + is probably only a few hundred or a few thousand cycles long. + See #226116. */ + ctst->os_state.threadgroup = ptst->os_state.threadgroup; + /* We don't really know where the client stack is, because its allocated by the client. The best we can do is look at the memory mappings and try to derive some useful information. We diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c index b08da000d9..db81ac1ce0 100644 --- a/coregrind/m_syswrap/syswrap-arm-linux.c +++ b/coregrind/m_syswrap/syswrap-arm-linux.c @@ -201,6 +201,17 @@ static SysRes do_clone ( ThreadId ptid, ctst->sig_mask = ptst->sig_mask; ctst->tmp_sig_mask = ptst->sig_mask; + /* Start the child with its threadgroup being the same as the + parent's. This is so that any exit_group calls that happen + after the child is created but before it sets its + os_state.threadgroup field for real (in thread_wrapper in + syswrap-linux.c), really kill the new thread. a.k.a this avoids + a race condition in which the thread is unkillable (via + exit_group) because its threadgroup is not set. The race window + is probably only a few hundred or a few thousand cycles long. + See #226116. */ + ctst->os_state.threadgroup = ptst->os_state.threadgroup; + seg = VG_(am_find_nsegment)((Addr)sp); if (seg && seg->kind != SkResvn) { ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(sp); diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c index 38c0ca4802..eaf3d87976 100644 --- a/coregrind/m_syswrap/syswrap-linux.c +++ b/coregrind/m_syswrap/syswrap-linux.c @@ -83,6 +83,9 @@ static VgSchedReturnCode thread_wrapper(Word /*ThreadId*/ tidW) VG_TRACK(pre_thread_first_insn, tid); tst->os_state.lwpid = VG_(gettid)(); + /* Set the threadgroup for real. This overwrites the provisional + value set in do_clone() syswrap-*-linux.c. See comments in + do_clone for background, also #226116. */ tst->os_state.threadgroup = VG_(getpid)(); /* Thread created with all signals blocked; scheduler will set the diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c index 397af97345..73fe2d1cc7 100644 --- a/coregrind/m_syswrap/syswrap-ppc32-linux.c +++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c @@ -297,6 +297,17 @@ static SysRes do_clone ( ThreadId ptid, ctst->sig_mask = ptst->sig_mask; ctst->tmp_sig_mask = ptst->sig_mask; + /* Start the child with its threadgroup being the same as the + parent's. This is so that any exit_group calls that happen + after the child is created but before it sets its + os_state.threadgroup field for real (in thread_wrapper in + syswrap-linux.c), really kill the new thread. a.k.a this avoids + a race condition in which the thread is unkillable (via + exit_group) because its threadgroup is not set. The race window + is probably only a few hundred or a few thousand cycles long. + See #226116. */ + ctst->os_state.threadgroup = ptst->os_state.threadgroup; + /* We don't really know where the client stack is, because its allocated by the client. The best we can do is look at the memory mappings and try to derive some useful information. We diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c index 858f26c61a..6922c860bd 100644 --- a/coregrind/m_syswrap/syswrap-ppc64-linux.c +++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c @@ -325,6 +325,17 @@ static SysRes do_clone ( ThreadId ptid, ctst->sig_mask = ptst->sig_mask; ctst->tmp_sig_mask = ptst->sig_mask; + /* Start the child with its threadgroup being the same as the + parent's. This is so that any exit_group calls that happen + after the child is created but before it sets its + os_state.threadgroup field for real (in thread_wrapper in + syswrap-linux.c), really kill the new thread. a.k.a this avoids + a race condition in which the thread is unkillable (via + exit_group) because its threadgroup is not set. The race window + is probably only a few hundred or a few thousand cycles long. + See #226116. */ + ctst->os_state.threadgroup = ptst->os_state.threadgroup; + /* We don't really know where the client stack is, because its allocated by the client. The best we can do is look at the memory mappings and try to derive some useful information. We diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c index f3ca0a29fe..66bcefbc2b 100644 --- a/coregrind/m_syswrap/syswrap-x86-linux.c +++ b/coregrind/m_syswrap/syswrap-x86-linux.c @@ -262,6 +262,17 @@ static SysRes do_clone ( ThreadId ptid, ctst->sig_mask = ptst->sig_mask; ctst->tmp_sig_mask = ptst->sig_mask; + /* Start the child with its threadgroup being the same as the + parent's. This is so that any exit_group calls that happen + after the child is created but before it sets its + os_state.threadgroup field for real (in thread_wrapper in + syswrap-linux.c), really kill the new thread. a.k.a this avoids + a race condition in which the thread is unkillable (via + exit_group) because its threadgroup is not set. The race window + is probably only a few hundred or a few thousand cycles long. + See #226116. */ + ctst->os_state.threadgroup = ptst->os_state.threadgroup; + /* We don't really know where the client stack is, because its allocated by the client. The best we can do is look at the memory mappings and try to derive some useful information. We