Fix 409141 and 409367: valgrind hangs or loops when a process sends a signal to itself.

author Philippe Waroquiers <philippe.waroquiers@skynet.be>

Tue, 2 Jul 2019 19:43:41 +0000 (21:43 +0200)

committer Philippe Waroquiers <philippe.waroquiers@skynet.be>

Tue, 9 Jul 2019 17:47:44 +0000 (19:47 +0200)
author Philippe Waroquiers <philippe.waroquiers@skynet.be>
Tue, 2 Jul 2019 19:43:41 +0000 (21:43 +0200)
committer Philippe Waroquiers <philippe.waroquiers@skynet.be>
Tue, 9 Jul 2019 17:47:44 +0000 (19:47 +0200)
diff --git a/.gitignore b/.gitignore

index 640b8c65e76790300ede1703246e9291d2258087..a0dedb8c56de127803662de2172ec4ff505e0e48 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -1419,6 +1419,7 @@
  /none/tests/pth_simple_mutex
  /none/tests/pth_simple_threads
  /none/tests/pth_specific
+/none/tests/pth_self_kill
  /none/tests/pth_stackalign
  /none/tests/pth_term_signal
  /none/tests/pth_yield
diff --git a/NEWS b/NEWS

index 6406035610e65dcb1f1ad9e8d1217678dd384a0e..7217164776ac6a7f0ab68810539619f2f7027a36 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -56,6 +56,9 @@ where XXXXXX is the bug number as listed below.
  408091  Missing pkey syscalls
  408414  Add support for missing for preadv2 and pwritev2 syscalls
  404406  s390x: z14 miscellaneous instructions not implemented
+409141  Valgrind hangs when SIGKILLed
+409367  exit_group() after signal to thread waiting in futex() causes hangs
+
  n-i-bz  Fix minor one time leaks in dhat.
  n-i-bz  Add --run-cxx-freeres=no in outer args to avoid inner crashes.
  n-i-bz  Add support for the Linux io_uring system calls
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c

index 9ba6efc4cc265caf9d9a4be04a5d69828b5b1a4b..008fff98b15d44fdb9027e776bce1a1122018c2d 100644 (file)
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -2426,8 +2426,8 @@ void async_signalhandler ( Int sigNo,
                  sigNo, tid, info->si_code,
                  VG_(name_of_VgSchedReturnCode)(tst->exitreason));
  
-   /* */
-   if (tst->exitreason == VgSrc_FatalSig)
+   /* See similar logic in VG_(poll_signals). */
+   if (tst->exitreason != VgSrc_None)
        resume_scheduler(tid);
  
     /* Update thread state properly.  The signal can only have been
@@ -2951,10 +2951,11 @@ void VG_(poll_signals)(ThreadId tid)
     ThreadState *tst = VG_(get_ThreadState)(tid);
     vki_sigset_t saved_mask;
  
-   if (tst->exitreason == VgSrc_FatalSig) {
-      /* This task has been requested to die due to a fatal signal
-         received by the process. So, we cannot poll new signals,
-         as we are supposed to die asap. If we would poll and deliver
+   if (tst->exitreason != VgSrc_None ) {
+      /* This task has been requested to die (e.g. due to a fatal signal
+         received by the process, or because of a call to exit syscall).
+         So, we cannot poll new signals, as we are supposed to die asap.
+         If we would poll and deliver
           a new (maybe fatal) signal, this could cause a deadlock, as
           this thread would believe it has to terminate the other threads
           and wait for them to die, while we already have a thread doing
diff --git a/coregrind/m_syswrap/syswrap-generic.c b/coregrind/m_syswrap/syswrap-generic.c

index 01191f634b83426b061f5883f84abdb12ed9a381..ab966485839f7b57cac449ef3a81adb55eeac88a 100644 (file)
--- a/coregrind/m_syswrap/syswrap-generic.c
+++ b/coregrind/m_syswrap/syswrap-generic.c
@@ -3755,20 +3755,26 @@ Bool ML_(do_sigkill)(Int pid, Int tgid)
     if (tgid != -1 && tst->os_state.threadgroup != tgid)
        return False;            /* not the right thread group */
  
-   /* Check to see that the target isn't already exiting. */
-   if (!VG_(is_exiting)(tid)) {
-      if (VG_(clo_trace_signals))
-        VG_(message)(Vg_DebugMsg,
-                      "Thread %u being killed with SIGKILL\n", 
-                      tst->tid);
-      
-      tst->exitreason = VgSrc_FatalSig;
-      tst->os_state.fatalsig = VKI_SIGKILL;
-      
-      if (!VG_(is_running_thread)(tid))
-        VG_(get_thread_out_of_syscall)(tid);
-   }
-   
+   /* Fatal SIGKILL sent to one of our threads.
+      "Handle" the signal ourselves, as trying to have tid
+      handling the signal causes termination problems (see #409367
+      and #409141).
+      Moreover, as a process cannot do anything when receiving SIGKILL,
+      it is not particularly crucial that "tid" does the work to
+      terminate the process.  */
+
+   if (VG_(clo_trace_signals))
+      VG_(message)(Vg_DebugMsg,
+                   "Thread %u %s being killed with SIGKILL, running tid: %u\n",
+                   tst->tid, VG_(name_of_ThreadStatus) (tst->status), VG_(running_tid));
+
+   if (!VG_(is_running_thread)(tid))
+      tst = VG_(get_ThreadState)(VG_(running_tid));
+   VG_(nuke_all_threads_except) (VG_(running_tid), VgSrc_FatalSig);
+   VG_(reap_threads)(VG_(running_tid));
+   tst->exitreason = VgSrc_FatalSig;
+   tst->os_state.fatalsig = VKI_SIGKILL;
+
     return True;
  }
  
diff --git a/none/tests/Makefile.am b/none/tests/Makefile.am

index 37ac4f087d8a6b48b37ff0453d5c676273cf5136..b63c8d97c37fdf1d040ae988ce86cee8a913f8ae 100644 (file)
--- a/none/tests/Makefile.am
+++ b/none/tests/Makefile.am
@@ -163,6 +163,8 @@ EXTRA_DIST = \
         pth_mutexspeed.stdout.exp pth_mutexspeed.vgtest \
         pth_once.stderr.exp pth_once.stdout.exp pth_once.vgtest \
         pth_rwlock.stderr.exp pth_rwlock.vgtest \
+       pth_self_kill_9.stderr.exp pth_self_kill_9.vgtest \
+       pth_self_kill_15_other.stderr.exp pth_self_kill_15_other.vgtest \
         pth_stackalign.stderr.exp \
         pth_stackalign.stdout.exp pth_stackalign.vgtest \
         pth_2sig.stderr.exp-linux pth_2sig.stderr.exp-solaris pth_2sig.vgtest \
@@ -227,7 +229,7 @@ check_PROGRAMS = \
         pselect_sigmask_null \
         pth_atfork1 pth_blockedsig pth_cancel1 pth_cancel2 pth_cvsimple \
         pth_empty pth_exit pth_exit2 pth_mutexspeed pth_once pth_rwlock \
-       pth_stackalign pth_2sig pth_term_signal\
+       pth_self_kill pth_stackalign pth_2sig pth_term_signal\
         rcrl readline1 \
         require-text-symbol \
         res_search resolv \
@@ -315,6 +317,7 @@ pth_mutexspeed_LDADD        = -lpthread
  pth_once_LDADD         = -lpthread
  pth_rwlock_LDADD       = -lpthread
  pth_rwlock_CFLAGS      = $(AM_CFLAGS)
+pth_self_kill_LDADD    = -lpthread
  pth_stackalign_LDADD   = -lpthread
  pth_2sig_LDADD         = -lpthread
  pth_term_signal_LDADD  = -lpthread
diff --git a/none/tests/pth_self_kill.c b/none/tests/pth_self_kill.c

new file mode 100644 (file)

index 0000000..98adbad
--- /dev/null
+++ b/none/tests/pth_self_kill.c
@@ -0,0 +1,44 @@
+#include <signal.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+/* This reproduces bugs 409141 and 409367.
+   valgrind ./pth_self_kill 9    was hanging.
+
+   valgrind ./pth_self_kill 15 killotherthread  was looping.
+*/
+
+void *t(void *p)
+{
+   sleep (200);
+   printf ("error: supposed to die without printing this\n");
+   exit (5);
+   return NULL;
+}
+
+int main(int argc, char **argv)
+{
+   pthread_t thr;
+
+   if (argc <= 1)
+   {
+      printf
+         ("usage: pth_self_kill SIGNALNR [killotherthread] [sleepafterkill]\n");
+      exit (1);
+   }
+
+   int s = atoi(argv[1]);
+
+   pthread_create(&thr, NULL, t, NULL);
+   sleep (1);
+   if (argc > 2)
+   {
+      pthread_kill(thr, s);
+      if (argc > 3)
+         sleep (2);
+   }
+   else
+      raise(s);
+}
diff --git a/none/tests/pth_self_kill_15_other.stderr.exp b/none/tests/pth_self_kill_15_other.stderr.exp

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/none/tests/pth_self_kill_15_other.vgtest b/none/tests/pth_self_kill_15_other.vgtest

new file mode 100644 (file)

index 0000000..4141869
--- /dev/null
+++ b/none/tests/pth_self_kill_15_other.vgtest
@@ -0,0 +1,3 @@
+prog: pth_self_kill
+args: 15 killotherthread
+vgopts: -q
diff --git a/none/tests/pth_self_kill_9.stderr.exp b/none/tests/pth_self_kill_9.stderr.exp

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/none/tests/pth_self_kill_9.vgtest b/none/tests/pth_self_kill_9.vgtest

new file mode 100644 (file)

index 0000000..4bed473
--- /dev/null
+++ b/none/tests/pth_self_kill_9.vgtest
@@ -0,0 +1,3 @@
+prog: pth_self_kill
+args: 9
+vgopts: -q
author	Philippe Waroquiers <philippe.waroquiers@skynet.be>
	Tue, 2 Jul 2019 19:43:41 +0000 (21:43 +0200)
committer	Philippe Waroquiers <philippe.waroquiers@skynet.be>
	Tue, 9 Jul 2019 17:47:44 +0000 (19:47 +0200)
.gitignore		patch \| blob \| blame \| history
NEWS		patch \| blob \| blame \| history
coregrind/m_signals.c		patch \| blob \| blame \| history
coregrind/m_syswrap/syswrap-generic.c		patch \| blob \| blame \| history
none/tests/Makefile.am		patch \| blob \| blame \| history
none/tests/pth_self_kill.c	[new file with mode: 0644]	patch \| blob
none/tests/pth_self_kill_15_other.stderr.exp	[new file with mode: 0644]	patch \| blob
none/tests/pth_self_kill_15_other.vgtest	[new file with mode: 0644]	patch \| blob
none/tests/pth_self_kill_9.stderr.exp	[new file with mode: 0644]	patch \| blob
none/tests/pth_self_kill_9.vgtest	[new file with mode: 0644]	patch \| blob