From 3d28f9b2dc4fde972bdd570bd3e91002e84668d2 Mon Sep 17 00:00:00 2001
From: Pedro Alves <pedro@palves.net>
Date: Wed, 14 May 2025 19:02:34 +0100
Subject: [PATCH] infrun: with AS+NS, prefer process exit over thread exit

This patch fixes gdb.base/ending-run.exp for Windows when the target
backend supports notifying infrun about thread exit events (which is
added by the Windows non-stop support, later).

Without this patch, and with the Windows target in non-stop mode
("maint set target-non-stop on"), we get, when stepping out of main:

 (gdb) PASS: gdb.base/ending-run.exp: Step to return
 next
 32      }
 (gdb) next
 [Thread 7956.0x2658 exited]
 [Thread 7956.0x2500 exited]
 [Thread 7956.0x2798 exited]
 Command aborted, thread exited.
 (gdb) FAIL: gdb.base/ending-run.exp: step out of main

With the patch, we get:

 (gdb) next
 [Thread 9424.0x40c exited]
 [Inferior 1 (process 9424) exited normally]
 (gdb) PASS: gdb.base/ending-run.exp: step out of main

In the failing case, what happens is that "next" enables
target_thread_events.  Then, the main thread causes the whole process
to exit.  On Windows, that makes the main thread report a thread exit
event, followed by thread exit events for all other threads, except
the last thread that happens to be the one that exits last.  That last
one reports an exit-process event instead.

Since "next" enabled target_thread_events, the Windows target backend
reports the main thread's exit event to infrun.  And then, since the
thread that was stepping reported a thread-exit, GDB aborts the "next"
command.

Stepping out of main is a very common thing to do, and I think
reporting the thread exit in this case when the whole process is
exiting isn't very useful.  I think we can do better.  So instead, if
we're about to report a thread exit in all-stop mode with the backend
in non-stop mode, and while stopping all threads, we see a
whole-process-exit event, prefer processing that event instead of
reporting the original thread exit.

A similar issue can be triggered on GNU/Linux as well, if we step over
an exit syscall that is called by any thread other than main.  This
scenario is exercised by the new testcase added by this patch.

Without the patch, the testcase shows:

 (gdb) next
 [Thread 0x7ffff7a00640 (LWP 3207243) exited]
 warning: error removing breakpoint 0 at 0x5555555551c3
 warning: error removing breakpoint 0 at 0x5555555551c3
 warning: error removing breakpoint 0 at 0x5555555551c3
 Command aborted, thread exited.
 Cannot remove breakpoints because program is no longer writable.
 Further execution is probably impossible.
 (gdb)

This is fixed for GNU/Linux by the patch, which results in:

 (gdb) next
 [Thread 0x7ffff7a00640 (LWP 3230550) exited]
 warning: error removing breakpoint 0 at 0x5555555551c3
 warning: error removing breakpoint 0 at 0x5555555551c3
 warning: error removing breakpoint 0 at 0x5555555551c3
 [Inferior 1 (process 3230539) exited normally]
 (gdb)

Pure all-stop targets (such as GNU/Linux GDBserver unless you force
non-stop with "maint set target-non-stop on") will unfortunately still
have the "Further execution is probably impossible." behavior, because
GDB can't see the process-exit event until the target is re-resumed.
That's unfortunate, but I don't think that should prevent improving
non-stop targets.  (And eventually I would like remote targets to be
always "maint set target-non-stop on" by default if possible, too.)

Change-Id: I56559f13e04aeafd812d15e4b408c8337bca5294
---
 gdb/infrun.c                                  | 209 ++++++++++++------
 .../gdb.threads/step-over-process-exit.c      |  49 ++++
 .../gdb.threads/step-over-process-exit.exp    |  66 ++++++
 3 files changed, 255 insertions(+), 69 deletions(-)
 create mode 100644 gdb/testsuite/gdb.threads/step-over-process-exit.c
 create mode 100644 gdb/testsuite/gdb.threads/step-over-process-exit.exp

diff --git a/gdb/infrun.c b/gdb/infrun.c
index 826093f79f9..81d0b1eb210 100644
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -110,6 +110,8 @@ static bool step_over_info_valid_p (void);
 
 static bool schedlock_applies (struct thread_info *tp);
 
+static void handle_process_exited (struct execution_control_state *ecs);
+
 /* Asynchronous signal handler registered as event loop source for
    when we have pending events ready to be passed to the core.  */
 static struct async_event_handler *infrun_async_inferior_event_token;
@@ -4767,7 +4769,68 @@ fetch_inferior_event ()
 	       don't want to stop all the other threads.  */
 	    if (ecs.event_thread == nullptr
 		|| !ecs.event_thread->control.in_cond_eval)
-	      stop_all_threads_if_all_stop_mode ();
+	      {
+		stop_all_threads_if_all_stop_mode ();
+
+		/* Say the user does "next" over an exit(0) call, or
+		   out of main, either of which make the whole process
+		   exit.  If the target supports target_thread_events,
+		   then that is activated while "next" is in progress,
+		   and consequently we may be processing a thread-exit
+		   event (caused by the process exit) for a thread
+		   that reported its exit before the
+		   whole-process-exit event is reported for another
+		   thread (which is normally going to be the last
+		   event out of the inferior, but we don't know for
+		   which thread it will be).
+
+		   If we're in 'all-stop on top of non-stop' mode, and
+		   indeed the inferior process is exiting, then the
+		   stop_all_threads_if_all_stop_mode call above must
+		   have seen the process-exit event, as it will see
+		   one stop for each and every (running) thread of the
+		   process.  Look at the pending statuses of all
+		   threads, and see if we have a process-exit status.
+		   If so, prefer handling it now and report the
+		   inferior exit to the user instead of reporting the
+		   original thread exit.
+
+		   Do not do this if are handling any other kind of
+		   event, like e.g., a breakpoint hit, which the user
+		   may be interested in knowing was hit before the
+		   process exited.  If we ever have a "catch
+		   thread-exit" or something similar, we may want to
+		   skip this "prefer process-exit" if such a
+		   catchpoint is installed.  */
+		if (ecs.ws.kind () == TARGET_WAITKIND_THREAD_EXITED
+		    && !non_stop && exists_non_stop_target ())
+		  {
+		    for (thread_info *tp : inf->non_exited_threads ())
+		      {
+			if (tp->has_pending_waitstatus ()
+			    && ((tp->pending_waitstatus ().kind ()
+				 == TARGET_WAITKIND_EXITED)
+				|| (tp->pending_waitstatus ().kind ()
+				    == TARGET_WAITKIND_SIGNALLED)))
+			  {
+			    /* Found a pending process-exit event.
+			       Prefer handling and reporting it now
+			       over the thread-exit event.  */
+			    infrun_debug_printf
+			      ("found pending process-exit event, preferring it");
+			    ecs.ws = tp->pending_waitstatus ();
+			    tp->clear_pending_waitstatus ();
+			    ecs.event_thread = nullptr;
+			    ecs.ptid = tp->ptid;
+			    /* Re-record the last target status.  */
+			    set_last_target_status (ecs.target, ecs.ptid,
+						    ecs.ws);
+			    handle_process_exited (&ecs);
+			    break;
+			  }
+		      }
+		  }
+	      }
 
 	    clean_up_just_stopped_threads_fsms (&ecs);
 
@@ -6091,6 +6154,81 @@ handle_thread_exited (execution_control_state *ecs)
   return true;
 }
 
+/* Handle a process exit event.  */
+
+static void
+handle_process_exited (execution_control_state *ecs)
+{
+  /* Depending on the system, ecs->ptid may point to a thread or to a
+     process.  On some targets, target_mourn_inferior may need to have
+     access to the just-exited thread.  That is the case of
+     GNU/Linux's "checkpoint" support, for example.  Call the
+     switch_to_xxx routine as appropriate.  */
+  thread_info *thr = ecs->target->find_thread (ecs->ptid);
+  if (thr != nullptr)
+    switch_to_thread (thr);
+  else
+    {
+      inferior *inf = find_inferior_ptid (ecs->target, ecs->ptid);
+      switch_to_inferior_no_thread (inf);
+    }
+
+  handle_vfork_child_exec_or_exit (0);
+  target_terminal::ours ();	/* Must do this before mourn anyway.  */
+
+  /* Clearing any previous state of convenience variables.  */
+  clear_exit_convenience_vars ();
+
+  if (ecs->ws.kind () == TARGET_WAITKIND_EXITED)
+    {
+      /* Record the exit code in the convenience variable $_exitcode,
+	 so that the user can inspect this again later.  */
+      set_internalvar_integer (lookup_internalvar ("_exitcode"),
+			       (LONGEST) ecs->ws.exit_status ());
+
+      /* Also record this in the inferior itself.  */
+      current_inferior ()->has_exit_code = true;
+      current_inferior ()->exit_code = (LONGEST) ecs->ws.exit_status ();
+
+      /* Support the --return-child-result option.  */
+      return_child_result_value = ecs->ws.exit_status ();
+
+      interps_notify_exited (ecs->ws.exit_status ());
+    }
+  else
+    {
+      struct gdbarch *gdbarch = current_inferior ()->arch ();
+
+      if (gdbarch_gdb_signal_to_target_p (gdbarch))
+	{
+	  /* Set the value of the internal variable $_exitsignal,
+	     which holds the signal uncaught by the inferior.  */
+	  set_internalvar_integer (lookup_internalvar ("_exitsignal"),
+				   gdbarch_gdb_signal_to_target (gdbarch,
+								 ecs->ws.sig ()));
+	}
+      else
+	{
+	  /* We don't have access to the target's method used for
+	     converting between signal numbers (GDB's internal
+	     representation <-> target's representation).
+	     Therefore, we cannot do a good job at displaying this
+	     information to the user.  It's better to just warn
+	     her about it (if infrun debugging is enabled), and
+	     give up.  */
+	  infrun_debug_printf ("Cannot fill $_exitsignal with the correct "
+			       "signal number.");
+	}
+
+      interps_notify_signal_exited (ecs->ws.sig ());
+    }
+
+  gdb_flush (gdb_stdout);
+  target_mourn_inferior (inferior_ptid);
+  stop_print_frame = false;
+  stop_waiting (ecs);
+}
+
 /* Given an execution control state that has been freshly filled in by
    an event from the inferior, figure out what it means and take
    appropriate action.
@@ -6300,74 +6438,7 @@ handle_inferior_event (struct execution_control_state *ecs)
 
     case TARGET_WAITKIND_EXITED:
     case TARGET_WAITKIND_SIGNALLED:
-      {
-	/* Depending on the system, ecs->ptid may point to a thread or
-	   to a process.  On some targets, target_mourn_inferior may
-	   need to have access to the just-exited thread.  That is the
-	   case of GNU/Linux's "checkpoint" support, for example.
-	   Call the switch_to_xxx routine as appropriate.  */
-	thread_info *thr = ecs->target->find_thread (ecs->ptid);
-	if (thr != nullptr)
-	  switch_to_thread (thr);
-	else
-	  {
-	    inferior *inf = find_inferior_ptid (ecs->target, ecs->ptid);
-	    switch_to_inferior_no_thread (inf);
-	  }
-      }
-      handle_vfork_child_exec_or_exit (0);
-      target_terminal::ours ();	/* Must do this before mourn anyway.  */
-
-      /* Clearing any previous state of convenience variables.  */
-      clear_exit_convenience_vars ();
-
-      if (ecs->ws.kind () == TARGET_WAITKIND_EXITED)
-	{
-	  /* Record the exit code in the convenience variable $_exitcode, so
-	     that the user can inspect this again later.  */
-	  set_internalvar_integer (lookup_internalvar ("_exitcode"),
-				   (LONGEST) ecs->ws.exit_status ());
-
-	  /* Also record this in the inferior itself.  */
-	  current_inferior ()->has_exit_code = true;
-	  current_inferior ()->exit_code = (LONGEST) ecs->ws.exit_status ();
-
-	  /* Support the --return-child-result option.  */
-	  return_child_result_value = ecs->ws.exit_status ();
-
-	  interps_notify_exited (ecs->ws.exit_status ());
-	}
-      else
-	{
-	  struct gdbarch *gdbarch = current_inferior ()->arch ();
-
-	  if (gdbarch_gdb_signal_to_target_p (gdbarch))
-	    {
-	      /* Set the value of the internal variable $_exitsignal,
-		 which holds the signal uncaught by the inferior.  */
-	      set_internalvar_integer (lookup_internalvar ("_exitsignal"),
-				       gdbarch_gdb_signal_to_target (gdbarch,
-							  ecs->ws.sig ()));
-	    }
-	  else
-	    {
-	      /* We don't have access to the target's method used for
-		 converting between signal numbers (GDB's internal
-		 representation <-> target's representation).
-		 Therefore, we cannot do a good job at displaying this
-		 information to the user.  It's better to just warn
-		 her about it (if infrun debugging is enabled), and
-		 give up.  */
-	      infrun_debug_printf ("Cannot fill $_exitsignal with the correct "
-				   "signal number.");
-	    }
-
-	  interps_notify_signal_exited (ecs->ws.sig ());
-	}
-
-      gdb_flush (gdb_stdout);
-      target_mourn_inferior (inferior_ptid);
-      stop_print_frame = false;
+      handle_process_exited (ecs);
       stop_waiting (ecs);
       return;
 
diff --git a/gdb/testsuite/gdb.threads/step-over-process-exit.c b/gdb/testsuite/gdb.threads/step-over-process-exit.c
new file mode 100644
index 00000000000..8244f7002b9
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/step-over-process-exit.c
@@ -0,0 +1,49 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2025 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+volatile int other_thread_exits = 0;
+
+static void *
+thread_function (void *arg)
+{
+  if (other_thread_exits)
+    exit (0); /* break here other */
+
+  while (1)
+    sleep (1);
+}
+
+int
+main ()
+{
+  pthread_t thread;
+
+  alarm (30);
+
+  pthread_create (&thread, NULL, thread_function, NULL);
+
+  if (!other_thread_exits)
+    exit (0); /* break here main */
+
+  while (1)
+    sleep (1);
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.threads/step-over-process-exit.exp b/gdb/testsuite/gdb.threads/step-over-process-exit.exp
new file mode 100644
index 00000000000..ea0a429e49a
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/step-over-process-exit.exp
@@ -0,0 +1,66 @@
+# This testcase is part of GDB, the GNU debugger.
+
+# Copyright 2025 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test stepping over an exit syscall from both the main thread, and a
+# non-main thread.
+
+if { [target_info exists exit_is_reliable] } {
+    set exit_is_reliable [target_info exit_is_reliable]
+} else {
+    set exit_is_reliable [expr ! [target_info exists use_gdb_stub]]
+}
+require {expr $exit_is_reliable}
+
+standard_testfile
+
+if { [prepare_for_testing "failed to prepare" $testfile $srcfile {debug pthread}] } {
+    return -1
+}
+
+# WHICH is which thread exits the process.  Can be "main" for main
+# thread, or "other" for the non-main thread.
+
+proc test {which} {
+    if ![runto_main] {
+	return -1
+    }
+
+    set other [expr {$which == "other"}]
+    gdb_test "p other_thread_exits = $other" " = $other"
+
+    set break_line [gdb_get_line_number "break here $which"]
+    gdb_breakpoint $break_line
+    gdb_continue_to_breakpoint "exit syscall"
+
+    set target_non_stop [is_target_non_stop]
+
+    gdb_test_multiple "next" "" {
+	-re -wrap "$::inferior_exited_re normally\\\]" {
+	    pass $gdb_test_name
+	}
+	-re -wrap "Further execution is probably impossible\\." {
+	    # With a target in all-stop, this is the best we can do.
+	    # We should not see this with a target backend in non-stop
+	    # mode, however.
+	    gdb_assert !$target_non_stop $gdb_test_name
+	}
+    }
+}
+
+foreach_with_prefix which {main other} {
+    test $which
+}
-- 
2.47.2