From: Pedro Alves <pedro@palves.net>
Date: Sat, 17 May 2025 09:54:50 +0000 (+0100)
Subject: Windows gdb: Add non-stop support
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6dc85017252155cf5a0d48752df5095378f88356;p=thirdparty%2Fbinutils-gdb.git

Windows gdb: Add non-stop support

This patch adds non-stop support to the native Windows target.

This is made possible by the ContinueDebugEvent DBG_REPLY_LATER flag:

https://learn.microsoft.com/en-us/windows/win32/api/debugapi/nf-debugapi-continuedebugevent

  Supported in Windows 10, version 1507 or above, this flag causes
  dwThreadId to replay the existing breaking event after the target
  continues. By calling the SuspendThread API against dwThreadId, a
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  debugger can resume other threads in the process and later return to
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  the breaking.
  ^^^^^^^^^^^^

The patch adds a new comment section in gdb/windows-nat.c providing an
overall picture of how all-stop / non-stop work.

Without DBG_REPLY_LATER, if we SuspendThread the thread, and then
immediately ContinueDebugThread(DBG_CONTINUE) before getting back to
the prompt, we could still have non-stop mode working, however, then
users wouldn't have a chance to decide whether to pass the signal to
the inferior the next time they resume the program, as that is done by
passing DBG_EXCEPTION_NOT_HANDLED to ContinueDebugEvent, and that has
already been called.

The patch teaches the Windows native backend to use that
DBG_REPLY_LATER flag, and also adds support for target_stop, so the
core can pause threads at its discretion.  This pausing does not use
the same mechanisms used in windows_nat_target::interrupt, as that
injects a new thread in the inferior.  Instead, for each thread the
core wants paused, it uses SuspendThread, and enqueues a pending
GDB_SIGNAL_0 stop on the thread.

Since DBG_REPLY_LATER only exists on Windows 10 and later, we only
enable non-stop mode on Windows 10 and later.

There is no displaced stepping support, but that's "just" a missed
optimization to be done later.

Cygwin signals handling was a major headache, but I managed to get it
working.  See the "Cygwin signals" description section I added at the
top of windows-nat.c.

Another interesting bit, is that the use DBG_REPLY_LATER caused one
problem with detach.  The Windows kernel re-raises any exception
previously intercepted and deferred with DBG_REPLY_LATER in the
inferior after we detach.  We need to flush those events, and suppress
those which aren't meant to be seen by the inferior (e.g.,
breakpoints, single-steps, any with matching "handle SIG nopass",
etc.), otherwise the inferior dies immediately after the detach, due
to an unhandled exception.

Change-Id: Id71aef461c43c244120635b5bedc638fe77c31fb
---

diff --git a/gdb/nat/windows-nat.c b/gdb/nat/windows-nat.c
index e9005cea090..63638b31489 100644
--- a/gdb/nat/windows-nat.c
+++ b/gdb/nat/windows-nat.c
@@ -723,17 +723,20 @@ get_last_debug_event_ptid ()
 /* See nat/windows-nat.h.  */
 
 BOOL
-continue_last_debug_event (DWORD continue_status, bool debug_events)
+continue_last_debug_event (DWORD cont_status, bool debug_events)
 {
-  DEBUG_EVENTS ("ContinueDebugEvent (cpid=%d, ctid=0x%x, %s)",
-		(unsigned) last_wait_event.dwProcessId,
-		(unsigned) last_wait_event.dwThreadId,
-		continue_status == DBG_CONTINUE ?
-		"DBG_CONTINUE" : "DBG_EXCEPTION_NOT_HANDLED");
+  DEBUG_EVENTS
+    ("ContinueDebugEvent (cpid=%d, ctid=0x%x, %s)",
+     (unsigned) last_wait_event.dwProcessId,
+     (unsigned) last_wait_event.dwThreadId,
+     cont_status == DBG_CONTINUE ? "DBG_CONTINUE" :
+     cont_status == DBG_EXCEPTION_NOT_HANDLED ? "DBG_EXCEPTION_NOT_HANDLED" :
+     cont_status == DBG_REPLY_LATER ? "DBG_REPLY_LATER" :
+     "DBG_???");
 
   return ContinueDebugEvent (last_wait_event.dwProcessId,
 			     last_wait_event.dwThreadId,
-			     continue_status);
+			     cont_status);
 }
 
 /* See nat/windows-nat.h.  */
diff --git a/gdb/nat/windows-nat.h b/gdb/nat/windows-nat.h
index 19403d0e623..fe377fcde34 100644
--- a/gdb/nat/windows-nat.h
+++ b/gdb/nat/windows-nat.h
@@ -87,12 +87,55 @@ struct windows_thread_info
   /* Thread Information Block address.  */
   CORE_ADDR thread_local_base;
 
+#ifdef __CYGWIN__
+  /* These two fields are used to handle Cygwin signals.  When a
+     thread is signaled, the "sig" thread inside the Cygwin runtime
+     reports the fact to us via a special OutputDebugString message.
+     In order to make stepping into a signal handler work, we can only
+     resume the "sig" thread when we also resume the target signaled
+     thread.  When we intercept a Cygwin signal, we set up a cross
+     link between the two threads using the two fields below, so we
+     can always identify one from the other.  See the "Cygwin signals"
+     description in gdb/windows-nat.c for more.  */
+
+  /* If this thread received a signal, then 'cygwin_sig_thread' points
+     to the "sig" thread within the Cygwin runtime.  */
+  windows_thread_info *cygwin_sig_thread = nullptr;
+
+  /* If this thread is the Cygwin runtime's "sig" thread, then
+     'signaled_thread' points at the thread that received a
+     signal.  */
+  windows_thread_info *signaled_thread = nullptr;
+#endif
+
+  /* If the thread had its event postponed with DBG_REPLY_LATER, when
+     we later ResumeThread this thread, WaitForDebugEvent will
+     re-report the postponed event.  This field holds the continue
+     status value to be automatically passed to ContinueDebugEvent
+     when we encounter this re-reported event.  0 if the thread has
+     not had its event postponed with DBG_REPLY_LATER. */
+  DWORD reply_later = 0;
+
   /* This keeps track of whether SuspendThread was called on this
      thread.  -1 means there was a failure or that the thread was
      explicitly not suspended, 1 means it was called, and 0 means it
      was not.  */
   int suspended = 0;
 
+  /* This flag indicates whether we are explicitly stopping this
+     thread in response to a target_stop request.  This allows
+     distinguishing between threads that are explicitly stopped by the
+     debugger and threads that are stopped due to other reasons.
+
+     Typically, when we want to stop a thread, we suspend it, enqueue
+     a pending GDB_SIGNAL_0 stop status on the thread, and then set
+     this flag to true.  However, if the thread has had its event
+     previously postponed with DBG_REPLY_LATER, it means that it
+     already has an event to report.  In such case, we simply set the
+     'stopping' flag without suspending the thread or enqueueing a
+     pending stop.  See stop_one_thread.  */
+  bool stopping = false;
+
 /* Info about a potential pending stop.
 
    Sometimes, Windows will report a stop on a thread that has been
@@ -173,15 +216,15 @@ struct windows_process_info
   virtual windows_thread_info *find_thread (ptid_t ptid) = 0;
 
   /* Handle OUTPUT_DEBUG_STRING_EVENT from child process.  Updates
-     OURSTATUS and returns the thread id if this represents a thread
-     change (this is specific to Cygwin), otherwise 0.
+     OURSTATUS and returns true if this represents a Cygwin signal,
+     otherwise false.
 
      Cygwin prepends its messages with a "cygwin:".  Interpret this as
      a Cygwin signal.  Otherwise just print the string as a warning.
 
      This function must be supplied by the embedding application.  */
-  virtual DWORD handle_output_debug_string (const DEBUG_EVENT &current_event,
-					    struct target_waitstatus *ourstatus) = 0;
+  virtual bool handle_output_debug_string (const DEBUG_EVENT &current_event,
+					   struct target_waitstatus *ourstatus) = 0;
 
   /* Handle a DLL load event.
 
diff --git a/gdb/windows-nat.c b/gdb/windows-nat.c
index 50bee871776..d987ca5655a 100644
--- a/gdb/windows-nat.c
+++ b/gdb/windows-nat.c
@@ -76,6 +76,202 @@
 #include "ser-event.h"
 #include "inf-loop.h"
 
+/* This comment documents high-level logic of this file.
+
+all-stop
+========
+
+In all-stop mode ("maint set target-non-stop off"), there is only ever
+one Windows debug event in flight. When we receive an event from
+WaitForDebugEvent, the kernel has already implicitly suspended all the
+threads of the process.  We report the breaking event to the core.
+When the core decides to resume the inferior, it calls
+windows_nat_target:resume, which triggers a ContinueDebugEvent call.
+This call makes all unsuspended threads schedulable again, and we go
+back to waiting for the next event in WaitForDebugEvent.
+
+non-stop
+========
+
+For non-stop mode, we utilize the DBG_REPLY_LATER flag in the
+ContinueDebugEvent function.  According to Microsoft:
+
+ "This flag causes dwThreadId to replay the existing breaking event
+ after the target continues.  By calling the SuspendThread API against
+ dwThreadId, a debugger can resume other threads in the process and
+ later return to the breaking."
+
+To enable non-stop mode, windows_nat_target::wait suspends the thread,
+calls 'ContinueForDebugEvent(..., DBG_REPLY_LATER)', and sets the
+process_thread thread to wait for the next event using
+WaitForDebugEvent, all before returning the original breaking event to
+the core.
+
+When the user/core finally decides to resume the inferior thread that
+reported the event, we unsuspend it using ResumeThread.  Unlike in
+all-stop mode, we don't call ContinueDebugEvent then, as it has
+already been called when the event was first encountered.  By making
+the inferior thread schedulable again (by unsuspending it),
+WaitForDebugEvent re-reports the same event (due to the earlier
+DBG_REPLY_LATER).  In windows_nat_target::wait, we detect this delayed
+re-report and call ContinueDebugEvent on the thread, instructing the
+"process_thread" thread (the GDB thread responsible for calling
+WaitForDebugEvents) to continue waiting for the next event.
+
+During the initial thread resumption in windows_nat_target::resume, we
+recorded the dwContinueStatus argument to be passed to the last
+ContinueDebugEvent (called when the reply-later event is re-reported).
+See windows_thread_info::reply_later for details.
+
+Note that with this setup, in non-stop mode, every stopped thread has
+its own independent last-reported Windows debug event.  Therefore, we
+can decide on a per-thread basis whether to pass the thread's
+exception (DBG_EXCEPTION_NOT_HANDLED / DBG_CONTINUE) to the inferior.
+This per-thread decision is not possible in all-stop mode, where we
+only call ContinueDebugEvent for the thread that last reported a stop,
+at windows_nat_target::resume time.
+
+Thread and process exits
+========================
+
+When a process exits, Windows reports one EXIT_THREAD_DEBUG_EVENT
+event for each thread, except for the last thread that exits.  That
+last thread reports a EXIT_PROCESS_DEBUG_EVENT event instead.
+
+The last thread that exits is not guaranteed to be the main thread of
+the process.  In fact, it seldom is.  E.g., if the main thread calls
+ExitProcess (or returns from main, which ends up calling ExitProcess),
+then we typically see a EXIT_THREAD_DEBUG_EVENT event for the main
+thread first, followed by more EXIT_THREAD_DEBUG_EVENT events for
+other threads, and then finaly the EXIT_PROCESS_DEBUG_EVENT for
+whatever thread happened to be the last one to exit.
+
+When a thread reports EXIT_THREAD_DEBUG_EVENT /
+EXIT_PROCESS_DEBUG_EVENT, our handle to the thread is still valid, and
+we can still read its registers.  Windows only destroys the handle
+after ContinueDebugEvent.
+
+A thread that has exited CANNOT be suspended.  So if a thread was
+previously suspended, and then something kills the whole process
+(which force-kills all threads), that suspended thread will
+automatically "unsuspend", and report a EXIT_THREAD_DEBUG_EVENT event.
+However, if we had previously used DBG_REPLY_LATER on the thread,
+Windows will first re-report the kernel-side-queued "reply-later"
+event, and only after that one is ContinueDebugEvent'ed, will we see
+the EXIT_THREAD_DEBUG_EVENT event.
+
+Detaching and DBG_REPLY_LATER
+=============================
+
+After we detach from a process that has threads that we had previously
+used DBG_REPLY_LATER on, the kernel re-raises the "reply-later"
+exceptions for those threads.  This would most often kill the
+just-detached process, if we let it happen.  To prevent it, we flush
+all the "reply-later" events from the kernel before detaching.
+
+Cygwin signals
+==============
+
+The Cygwin runtime always spawns a "sig" thread, which is responsible
+for receiving signal delivery requests, and hijacking the signaled
+thread's execution to make it run the signal handler.  This is all
+explained here:
+
+  https://sourceware.org/cgit/newlib-cygwin/tree/winsup/cygwin/DevDocs/how-signals-work.txt
+
+There's a custom debug api protocol between GDB and Cygwin to be able
+to intercept Cygwin signals before they're seen by the signaled
+thread, just like the debugger intercepts signals with ptrace on
+Linux.  This Cygwin debugger protocol isn't well documented, though.
+Here's what happens: when the special "sig" thread in the Cygwin
+runtime is about to deliver a signal to the target thread, it calls
+OutputDebugString with a special message:
+
+  https://sourceware.org/cgit/newlib-cygwin/tree/winsup/cygwin/exceptions.cc?id=4becae7bd833e183c789821a477f25898ed0db1f#n1866
+
+OutputDebugString is a function that is part of the Windows debug API.
+It generates an OUTPUT_DEBUG_STRING_EVENT event out of
+WaitForDebugEvent in the debugger, which freezes the inferior, like
+any other event.
+
+GDB recognizes the special Cygwin signal marker string, and is able to
+report the intercepted Cygwin signal to the user.
+
+With the windows-nat backend in all-stop mode, if the user decides to
+single-step the signaled thread, GDB will set the trace flag in the
+signaled thread to force it to single-step, and then re-resume the
+program with ContinueDebugEvent.  This resumes both the signaled
+thread, and the special "sig" thread.  The special "sig" thread
+decides to make the signaled thread run the signal handler, so it
+suspends it with SuspendThread, does a read-modify-write operation
+with GetThreadContext/SetThreadContext, and then re-resumes it with
+ResumeThread.  This is all done here:
+
+   https://sourceware.org/cgit/newlib-cygwin/tree/winsup/cygwin/exceptions.cc?id=4becae7bd833e183c789821a477f25898ed0db1f#n1011
+
+That resulting register context will still have its trace flag set, so
+the signaled thread ends up single-stepping the signal handler and
+reporting the trace stop to GDB, which reports the stop where the
+thread is now stopped, inside the signal handler.
+
+That is the intended behavior; stepping into a signal handler is a
+feature that works on other ports as well, including x86 GNU/Linux,
+for example.  This is exercised by the gdb.base/sigstep.exp testcase.
+
+Now, making that work with the backend in non-stop mode (the default
+on Windows 10 and above) is tricker.  In that case, when GDB sees the
+magic OUTPUT_DEBUG_STRING_EVENT event mentioned above, reported for
+the "sig" thread, GDB reports the signal stop for the target signaled
+thread to the user (leaving that thread stopped), but, unlike with an
+all-stop backend, in non-stop, only the evented/signaled thread should
+be stopped, so the backend would normally want to re-resume the Cygwin
+runtime's "sig" thread after handling the OUTPUT_DEBUG_STRING_EVENT
+event, like it does with any other event out of WaitForDebugEvent that
+is not reported to the core.  If it did that (resume the "sig" thread)
+however, at that point, the signaled thread would be stopped,
+suspended with SuspendThread by GDB (while the user is inspecting it),
+but, unlike in all-stop, the "sig" thread would be set running free.
+The "sig" thread would reach the code that wants to redirect the
+signaled thread's execution to the signal handler (by hacking the
+registers context, as described above), but unlike in the all-stop
+case, the "sig" thread would notice that the signaled thread is
+suspended, and so would decide to defer the signal handler until a
+later time.  It's the same code as described above for the all-stop
+case, except it would take the "then" branch:
+
+   https://sourceware.org/cgit/newlib-cygwin/tree/winsup/cygwin/exceptions.cc?id=4becae7bd833e183c789821a477f25898ed0db1f#n1019
+
+   // Just set pending if thread is already suspended
+   if (res)
+     {
+       tls->unlock ();
+       ResumeThread (hth);
+       goto out;
+     }
+
+The result would be that when the GDB user later finally decides to
+step the signaled thread, the signaled thread would just single step
+the mainline code, instead of stepping into the signal handler.
+
+To avoid this difference of behavior in non-stop mode compared to
+all-stop mode, we use a trick -- whenever we see that magic
+OUTPUT_DEBUG_STRING_EVENT event reported for the "sig" thread, we
+report a stop for the target signaled thread, _and_ leave the "sig"
+thread suspended as well, for as long as the target signaled thread is
+suspended.  I.e., we don't let the "sig" thread run before the user
+decides what to do with the signaled thread's signal.  Only when the
+user re-resumes the signaled thread, will we resume the "sig" thread
+as well.  The trick is that all this is done here in the Windows
+backend, while providing the illusion to the core of GDB (and the
+user) that the "sig" thread is "running", for as long as the core
+wants the "sig" thread to be running.
+
+This isn't ideal, since this means that with user-visible non-stop,
+the inferior will only be able to process and report one signal at a
+time (as the "sig" thread is responsible for that), but that seems
+like an acceptible compromise, better than not being able to have the
+target work in non-stop by default on Cygwin.  */
+
 using namespace windows_nat;
 
 /* Maintain a linked list of "so" information.  */
@@ -102,6 +298,11 @@ enum windows_continue_flag
        call to continue the inferior -- we are either mourning it or
        detaching.  */
     WCONT_LAST_CALL = 2,
+
+    /* By default, windows_continue only calls ContinueDebugEvent in
+       all-stop mode.  This flag indicates that windows_continue
+       should call ContinueDebugEvent even in non-stop mode.  */
+    WCONT_CONTINUE_DEBUG_EVENT = 4,
   };
 
 DEF_ENUM_FLAGS_TYPE (windows_continue_flag, windows_continue_flags);
@@ -129,8 +330,8 @@ struct windows_private_thread_info : private_thread_info, windows_thread_info
 struct windows_per_inferior : public windows_process_info
 {
   windows_thread_info *find_thread (ptid_t ptid) override;
-  DWORD handle_output_debug_string (const DEBUG_EVENT &current_event,
-				    struct target_waitstatus *ourstatus) override;
+  bool handle_output_debug_string (const DEBUG_EVENT &current_event,
+				   struct target_waitstatus *ourstatus) override;
   void handle_load_dll (const char *dll_name, LPVOID base) override;
   void handle_unload_dll (const DEBUG_EVENT &current_event) override;
   bool handle_access_violation (const EXCEPTION_RECORD *rec) override;
@@ -279,7 +480,11 @@ struct windows_nat_target final : public x86_nat_target<inf_child_target>
   void attach (const char *, int) override;
 
   bool attach_no_wait () override
-  { return true; }
+  {
+    /* In non-stop, after attach, we leave all threads running, like
+       other targets.  */
+    return !target_is_non_stop_p ();
+  }
 
   void detach (inferior *, int) override;
 
@@ -322,8 +527,13 @@ struct windows_nat_target final : public x86_nat_target<inf_child_target>
   std::string pid_to_str (ptid_t) override;
 
   void interrupt () override;
+  void stop (ptid_t) override;
   void pass_ctrlc () override;
 
+  void thread_events (bool enable) override;
+
+  bool any_resumed_thread ();
+
   const char *pid_to_exec_file (int pid) override;
 
   ptid_t get_ada_task_ptid (long lwp, ULONGEST thread) override;
@@ -353,6 +563,8 @@ struct windows_nat_target final : public x86_nat_target<inf_child_target>
     return m_is_async;
   }
 
+  bool supports_non_stop () override;
+
   void async (bool enable) override;
 
   int async_wait_fd () override
@@ -367,6 +579,15 @@ private:
   void delete_thread (ptid_t ptid, DWORD exit_code, bool main_thread_p);
   DWORD fake_create_process (const DEBUG_EVENT &current_event);
 
+  void stop_one_thread (windows_thread_info *th);
+
+  DWORD continue_status_for_event_detaching
+    (const DEBUG_EVENT &event, size_t *reply_later_events_left = nullptr);
+
+  DWORD prepare_resume (windows_thread_info *wth,
+			thread_info *tp,
+			int step, gdb_signal sig);
+
   BOOL windows_continue (DWORD continue_status, int id,
 			 windows_continue_flags cont_flags = 0);
 
@@ -430,6 +651,9 @@ private:
      already returned an event, and we need to ContinueDebugEvent
      again to restart the inferior.  */
   bool m_continued = false;
+
+  /* Whether target_thread_events is in effect.  */
+  bool m_report_thread_events = false;
 };
 
 /* Get the windows_thread_info object associated with THR.  */
@@ -722,6 +946,13 @@ windows_nat_target::add_thread (ptid_t ptid, HANDLE h, void *tlb,
      registers.  */
   th->debug_registers_changed = true;
 
+  /* Even if we're stopping the thread for some reason internal to
+     this module, from the perspective of infrun and the
+     user/frontend, this new thread is running until it next reports a
+     stop.  */
+  set_state (this, ptid, THREAD_RUNNING);
+  set_internal_state (this, ptid, THREAD_INT_RUNNING);
+
   return th;
 }
 
@@ -1093,12 +1324,17 @@ signal_event_command (const char *args, int from_tty)
 
 /* See nat/windows-nat.h.  */
 
-DWORD
+bool
 windows_per_inferior::handle_output_debug_string
   (const DEBUG_EVENT &current_event,
    struct target_waitstatus *ourstatus)
 {
-  DWORD thread_id = 0;
+  windows_thread_info *event_thr
+    = windows_process.find_thread (ptid_t (current_event.dwProcessId,
+					   current_event.dwThreadId));
+  if (event_thr->reply_later != 0)
+    internal_error ("OutputDebugString thread 0x%x has reply-later set",
+		    event_thr->tid);
 
   gdb::unique_xmalloc_ptr<char> s
     = (target_read_string
@@ -1135,15 +1371,37 @@ windows_per_inferior::handle_output_debug_string
       int sig = strtol (s.get () + sizeof (_CYGWIN_SIGNAL_STRING) - 1, &p, 0);
       gdb_signal gotasig = gdb_signal_from_host (sig);
       LPCVOID x = 0;
+      DWORD thread_id = 0;
 
-      if (gotasig)
+      if (gotasig != GDB_SIGNAL_0)
 	{
-	  ourstatus->set_stopped (gotasig);
 	  thread_id = strtoul (p, &p, 0);
-	  if (thread_id == 0)
-	    thread_id = current_event.dwThreadId;
-	  else
-	    x = (LPCVOID) (uintptr_t) strtoull (p, NULL, 0);
+	  if (thread_id != 0)
+	    {
+	      x = (LPCVOID) (uintptr_t) strtoull (p, NULL, 0);
+
+	      ptid_t ptid (current_event.dwProcessId, thread_id, 0);
+	      windows_thread_info *th = find_thread (ptid);
+
+	      /* Suspend the signaled thread, and leave the signal as
+		 a pending event.  It will be picked up by
+		 windows_nat_target::wait.  */
+	      th->suspend ();
+	      th->stopping = true;
+	      th->last_event = {};
+	      th->pending_status.set_stopped (gotasig);
+
+	      /* Link the "sig" thread and the signaled threads, so we
+		 can keep the "sig" thread suspended until we resume
+		 the signaled thread.  See "Cygwin signals" at the
+		 top.  */
+	      event_thr->signaled_thread = th;
+	      th->cygwin_sig_thread = event_thr;
+
+	      /* Leave the "sig" thread suspended.  */
+	      event_thr->suspend ();
+	      return true;
+	    }
 	}
 
       DEBUG_EVENTS ("gdb: cygwin signal %d, thread 0x%x, CONTEXT @ %p",
@@ -1151,7 +1409,7 @@ windows_per_inferior::handle_output_debug_string
     }
 #endif
 
-  return thread_id;
+  return false;
 }
 
 static int
@@ -1307,6 +1565,15 @@ windows_per_inferior::continue_one_thread (windows_thread_info *th,
 {
   struct x86_debug_reg_state *state = x86_debug_reg_state (process_id);
 
+  /* If this thread is already gone, but the core doesn't know about
+     it yet, there's really nothing to resume.  Such a thread will
+     have a pending exit status, so we won't try to resume it in the
+     normal resume path.  But, we can still end up here in the
+     kill/detach/mourn paths, trying to resume the whole process to
+     collect the last debug event.  */
+  if (th->h == nullptr)
+    return;
+
   windows_process.with_context (th, [&] (auto *context)
     {
       if (th->debug_registers_changed)
@@ -1380,6 +1647,7 @@ windows_per_inferior::continue_one_thread (windows_thread_info *th,
     });
 
   th->resume ();
+  th->stopping = false;
   th->last_sig = GDB_SIGNAL_0;
 }
 
@@ -1391,31 +1659,77 @@ BOOL
 windows_nat_target::windows_continue (DWORD continue_status, int id,
 				      windows_continue_flags cont_flags)
 {
-  for (auto *th : all_windows_threads ())
-    {
-      if ((id == -1 || id == (int) th->tid)
-	  && !th->suspended
-	  && th->pending_status.kind () != TARGET_WAITKIND_IGNORE)
-	{
-	  DEBUG_EVENTS ("got matching pending stop event "
-			"for 0x%x, not resuming",
-			th->tid);
-
-	  /* There's no need to really continue, because there's already
-	     another event pending.  However, we do need to inform the
-	     event loop of this.  */
-	  serial_event_set (m_wait_event);
-	  return TRUE;
-	}
-    }
+  if ((cont_flags & (WCONT_LAST_CALL | WCONT_KILLED)) == 0)
+    for (auto *th : all_windows_threads ())
+      {
+	if ((id == -1 || id == (int) th->tid)
+	    && th->pending_status.kind () != TARGET_WAITKIND_IGNORE)
+	  {
+	    DEBUG_EVENTS ("got matching pending stop event "
+			  "for 0x%x, not resuming",
+			  th->tid);
+
+	    /* There's no need to really continue, because there's already
+	       another event pending.  However, we do need to inform the
+	       event loop of this.  */
+	    serial_event_set (m_wait_event);
+	    return TRUE;
+	  }
+      }
 
+  /* Resume any suspended thread whose ID matches "ID".  Skip the
+     Cygwin "sig" thread in the main iteration, though.  That one is
+     only resumed when the target signaled thread is resumed.  See
+     "Cygwin signals" in the intro section.  */
   for (auto *th : all_windows_threads ())
-    if (id == -1 || id == (int) th->tid)
-      windows_process.continue_one_thread (th, cont_flags);
+    if (th->suspended
+#ifdef __CYGWIN__
+	&& th->signaled_thread == nullptr
+#endif
+	&& (id == -1 || id == (int) th->tid))
+      {
+	windows_process.continue_one_thread (th, cont_flags);
+
+#ifdef __CYGWIN__
+	/* See if we're resuming a thread that caught a Cygwin signal.
+	   If so, also resume the Cygwin runtime's "sig" thread.  */
+	if (th->cygwin_sig_thread != nullptr)
+	  {
+	    DEBUG_EVENTS ("\"sig\" thread %d (0x%x) blocked by "
+			  "just-resumed thread %d (0x%x)",
+			  th->cygwin_sig_thread->tid,
+			  th->cygwin_sig_thread->tid,
+			  th->tid, th->tid);
+
+	    inferior *inf = find_inferior_pid (this,
+					       windows_process.process_id);
+	    thread_info *sig_thr
+	      = inf->find_thread (ptid_t (windows_process.process_id,
+					  th->cygwin_sig_thread->tid));
+	    if (sig_thr->internal_state () == THREAD_INT_RUNNING)
+	      {
+		DEBUG_EVENTS ("\"sig\" thread %d (0x%x) meant to be running, "
+			      "continuing it now",
+			      th->cygwin_sig_thread->tid,
+			      th->cygwin_sig_thread->tid);
+		windows_process.continue_one_thread (th->cygwin_sig_thread,
+						     cont_flags);
+	      }
+	    /* Break the chain.  */
+	    th->cygwin_sig_thread->signaled_thread = nullptr;
+	    th->cygwin_sig_thread = nullptr;
+	  }
+#endif
+      }
 
-  continue_last_debug_event_main_thread
-    (_("Failed to resume program execution"), continue_status,
-     cont_flags & WCONT_LAST_CALL);
+  if (!target_is_non_stop_p ()
+      || (cont_flags & WCONT_CONTINUE_DEBUG_EVENT) != 0)
+    {
+      DEBUG_EVENTS ("windows_continue -> continue_last_debug_event");
+      continue_last_debug_event_main_thread
+	(_("Failed to resume program execution"), continue_status,
+	 cont_flags & WCONT_LAST_CALL);
+    }
 
   return TRUE;
 }
@@ -1443,36 +1757,46 @@ windows_nat_target::fake_create_process (const DEBUG_EVENT &current_event)
   return current_event.dwThreadId;
 }
 
-void
-windows_nat_target::resume (ptid_t ptid, int step, enum gdb_signal sig)
-{
-  windows_thread_info *th;
-  DWORD continue_status = DBG_CONTINUE;
-
-  /* A specific PTID means `step only this thread id'.  */
-  int resume_all = ptid == minus_one_ptid;
-
-  /* If we're continuing all threads, it's the current inferior that
-     should be handled specially.  */
-  if (resume_all)
-    ptid = inferior_ptid;
+/* Prepare TH to be resumed.  TH and TP must point at the same thread.
+   Records the right dwContinueStatus for SIG in th->reply_later if we
+   used DBG_REPLY_LATER before on this thread, and sets of clears the
+   trace flag according to STEP.  Also returns the dwContinueStatus
+   argument to pass to ContinueDebugEvent.  The thread is still left
+   suspended -- a subsequent windows_continue/continue_one_thread call
+   is needed to flush the thread's register context and unsuspend.  */
 
-  DEBUG_EXEC ("pid=%d, tid=0x%x, step=%d, sig=%d",
-	      ptid.pid (), (unsigned) ptid.lwp (), step, sig);
+DWORD
+windows_nat_target::prepare_resume (windows_thread_info *th,
+				    thread_info *tp,
+				    int step, gdb_signal sig)
+{
+  gdb_assert (th->tid == tp->ptid.lwp ());
 
-  /* Get currently selected thread.  */
-  th = windows_process.find_thread (inferior_ptid);
-  gdb_assert (th != nullptr);
+  DWORD continue_status = DBG_CONTINUE;
 
   if (sig != GDB_SIGNAL_0)
     {
+      /* Allow continuing with the same signal that interrupted us.
+	 Otherwise complain.  */
+
       /* Note it is OK to call get_last_debug_event_ptid() from the
-	 main thread here, because we know the process_thread thread
-	 isn't waiting for an event at this point, so there's no data
-	 race.  */
-      if (inferior_ptid != get_last_debug_event_ptid ())
+	 main thread here in all-stop, because we know the
+	 process_thread thread is not waiting for an event at this
+	 point, so there is no data race.  We cannot call it in
+	 non-stop mode, as the process_thread thread _is_ waiting for
+	 events right now in that case.  However, the restriction does
+	 not exist in non-stop mode, so we don't even call it in that
+	 mode.  */
+      if (!target_is_non_stop_p ()
+	  && tp->ptid != get_last_debug_event_ptid ())
 	{
-	  /* ContinueDebugEvent will be for a different thread.  */
+	  /* In all-stop, ContinueDebugEvent will be for a different
+	     thread.  For non-stop, we've called ContinueDebugEvent
+	     with DBG_REPLY_LATER for this thread, so we just set the
+	     intended continue status in 'reply_later', which is later
+	     passed to ContinueDebugEvent in windows_nat_target::wait
+	     after we resume the thread and we get the replied-later
+	     (repeated) event out of WaitForDebugEvent.  */
 	  DEBUG_EXCEPT ("Cannot continue with signal %d here.  "
 			"Not last-event thread", sig);
 	}
@@ -1508,20 +1832,58 @@ windows_nat_target::resume (ptid_t ptid, int step, enum gdb_signal sig)
 		    th->last_sig);
     }
 
+  /* If DBG_REPLY_LATER was used on the thread, we override the
+     continue status that will be passed to ContinueDebugEvent later
+     with the continue status we've just determined fulfils the
+     caller's resumption request.  Note that DBG_REPLY_LATER is only
+     used in non-stop mode, and in that mode, windows_continue (called
+     below) does not call ContinueDebugEvent.  */
+  if (th->reply_later != 0)
+    th->reply_later = continue_status;
+
+  /* Single step by setting t bit (trap flag).  The trap flag is
+     automatically reset as soon as the single-step exception arrives,
+     however, it's possible to suspend/stop a thread before it
+     executes any instruction, leaving the trace flag set.  If we
+     subsequently decide to continue such a thread instead of stepping
+     it, and we didn't clear the trap flag, the thread would step, and
+     we'd end up reporting a SIGTRAP to the core which the core
+     couldn't explain (because the thread wasn't supposed to be
+     stepping), and end up reporting a spurious SIGTRAP to the
+     user.  */
+  regcache *regcache = get_thread_regcache (tp);
+  fetch_registers (regcache, gdbarch_ps_regnum (regcache->arch ()));
+
   windows_process.with_context (th, [&] (auto *context)
     {
       if (step)
-	{
-	  /* Single step by setting t bit.  */
-	  regcache *regcache = get_thread_regcache (inferior_thread ());
-	  struct gdbarch *gdbarch = regcache->arch ();
-	  fetch_registers (regcache, gdbarch_ps_regnum (gdbarch));
-	  context->EFlags |= FLAG_TRACE_BIT;
-	}
+	context->EFlags |= FLAG_TRACE_BIT;
+      else
+	context->EFlags &= ~FLAG_TRACE_BIT;
     });
 
-  /* Allow continuing with the same signal that interrupted us.
-     Otherwise complain.  */
+  return continue_status;
+}
+
+void
+windows_nat_target::resume (ptid_t ptid, int step, enum gdb_signal sig)
+{
+  /* A specific PTID means `step only this thread id'.  */
+  int resume_all = ptid == minus_one_ptid;
+
+  /* If we're continuing all threads, it's the current inferior that
+     should be handled specially.  */
+  if (resume_all)
+    ptid = inferior_ptid;
+
+  DEBUG_EXEC ("pid=%d, tid=0x%x, step=%d, sig=%d",
+	      ptid.pid (), (unsigned) ptid.lwp (), step, sig);
+
+  /* Get currently selected thread.  */
+  windows_thread_info *th = windows_process.find_thread (inferior_ptid);
+  gdb_assert (th != nullptr);
+
+  DWORD continue_status = prepare_resume (th, inferior_thread (), step, sig);
 
   if (resume_all)
     windows_continue (continue_status, -1);
@@ -1570,12 +1932,134 @@ windows_nat_target::interrupt ()
 	     "Press Ctrl-c in the program console."));
 }
 
+/* Stop thread TH.  This leaves a GDB_SIGNAL_0 pending in the thread,
+   which is later consumed by windows_nat_target::wait.  */
+
+void
+windows_nat_target::stop_one_thread (windows_thread_info *th)
+{
+  ptid_t thr_ptid (windows_process.process_id, th->tid);
+
+  if (th->suspended == -1)
+    {
+      /* Already known to be stopped; and suspension failed, most
+	 probably because the thread is exiting.  Do nothing, and let
+	 the thread exit event be reported.  */
+      DEBUG_EVENTS ("already suspended %s: suspended=%d, stopping=%d",
+		    thr_ptid.to_string ().c_str (),
+		    th->suspended, th->stopping);
+    }
+#ifdef __CYGWIN__
+  else if (th->suspended
+	   && th->signaled_thread != nullptr
+	   && th->pending_status.kind () == TARGET_WAITKIND_IGNORE)
+    {
+      DEBUG_EVENTS ("explict stop for \"sig\" thread %s held for signal",
+		    thr_ptid.to_string ().c_str ());
+
+      th->stopping = true;
+      th->pending_status.set_stopped (GDB_SIGNAL_0);
+      th->last_event = {};
+      serial_event_set (m_wait_event);
+    }
+#endif
+  else if (th->suspended)
+    {
+      /* Already known to be stopped; do nothing.  */
+
+      DEBUG_EVENTS ("already suspended %s: suspended=%d, stopping=%d",
+		    thr_ptid.to_string ().c_str (),
+		    th->suspended, th->stopping);
+
+      th->stopping = true;
+    }
+  else
+    {
+      DEBUG_EVENTS ("stop request for %s", thr_ptid.to_string ().c_str ());
+
+      th->suspend ();
+
+      /* If suspension failed, it means the thread is exiting.  Let
+	 the thread exit event be reported instead of faking our own
+	 stop.  */
+      if (th->suspended == -1)
+	{
+	  DEBUG_EVENTS ("suspension of %s failed, expect thread exit event",
+			thr_ptid.to_string ().c_str ());
+	  return;
+	}
+
+      gdb_assert (th->suspended == 1);
+
+      th->stopping = true;
+      th->pending_status.set_stopped (GDB_SIGNAL_0);
+      th->last_event = {};
+      serial_event_set (m_wait_event);
+    }
+}
+
+/* Implementation of target_ops::stop.  */
+
+void
+windows_nat_target::stop (ptid_t ptid)
+{
+  for (thread_info *thr : all_non_exited_threads (this))
+    {
+      if (thr->ptid.matches (ptid))
+	stop_one_thread (as_windows_thread_info (thr));
+    }
+}
+
 void
 windows_nat_target::pass_ctrlc ()
 {
   interrupt ();
 }
 
+/* Implementation of the target_ops::thread_events method.  */
+
+void
+windows_nat_target::thread_events (bool enable)
+{
+  DEBUG_EVENTS ("windows_nat_target::thread_events(%d)", enable);
+  m_report_thread_events = enable;
+}
+
+/* True if there is any resumed thread.  */
+
+bool
+windows_nat_target::any_resumed_thread ()
+{
+  for (thread_info *thread : all_non_exited_threads (this))
+    if (thread->internal_state () == THREAD_INT_RUNNING)
+      return true;
+  return false;
+}
+
+/* Called for both EXIT_THREAD_DEBUG_EVENT and
+   EXIT_PROCESS_DEBUG_EVENT to handle the fact that the event thread
+   has exited.  */
+
+static void
+handle_thread_exit (const DEBUG_EVENT &current_event)
+{
+  ptid_t ptid (current_event.dwProcessId, current_event.dwThreadId);
+  windows_thread_info *th = windows_process.find_thread (ptid);
+  gdb_assert (th != nullptr);
+
+  /* The handle is still valid, but it is going to be automatically
+     closed by Windows when we next call ContinueDebugEvent.  Fetch
+     the thread's registers while we still can.  For EXIT_PROCESS,
+     ContinueDebugEvent only happens at target_mourn_inferior time,
+     but do this not too, for consistency with EXIT_THREAD time.  */
+  windows_process.fill_thread_context (th);
+  th->h = nullptr;
+
+  /* The thread is gone, so no longer suspended from Windows's
+     perspective.  */
+  th->suspended = -1;
+}
+
 /* Get the next event from the child.  Returns the thread ptid.  */
 
 ptid_t
@@ -1589,24 +2073,32 @@ windows_nat_target::get_windows_debug_event
   /* If there is a relevant pending stop, report it now.  See the
      comment by the definition of "windows_thread_info::pending_status"
      for details on why this is needed.  */
-  for (auto *th : all_windows_threads ())
+  for (thread_info *thread : all_threads_safe ())
     {
-      if (!th->suspended
+      if (thread->inf->process_target () != this)
+	continue;
+
+      auto *th = as_windows_thread_info (thread);
+      if (thread->internal_state () == THREAD_INT_RUNNING
+	  && th->suspended
 	  && th->pending_status.kind () != TARGET_WAITKIND_IGNORE)
 	{
-	  DEBUG_EVENTS ("reporting pending event for 0x%x", th->tid);
-
-	  thread_id = th->tid;
 	  *ourstatus = th->pending_status;
 	  th->pending_status.set_ignore ();
 	  *current_event = th->last_event;
-
-	  ptid_t ptid (windows_process.process_id, thread_id);
-	  windows_process.invalidate_context (th);
-	  return ptid;
+	  DEBUG_EVENTS ("reporting pending event for 0x%x", th->tid);
+	  return thread->ptid;
 	}
     }
 
+  /* If there are no resumed threads left, bail.  */
+  if (windows_process.windows_initialization_done
+      && !any_resumed_thread ())
+    {
+      ourstatus->set_no_resumed ();
+      return minus_one_ptid;
+    }
+
   if ((options & TARGET_WNOHANG) != 0 && !m_debug_event_pending)
     {
       ourstatus->set_ignore ();
@@ -1620,6 +2112,78 @@ windows_nat_target::get_windows_debug_event
   event_code = current_event->dwDebugEventCode;
   ourstatus->set_spurious ();
 
+  ptid_t result_ptid (current_event->dwProcessId,
+		      current_event->dwThreadId, 0);
+  windows_thread_info *result_th = windows_process.find_thread (result_ptid);
+
+  /* If we previously used DBG_REPLY_LATER on this thread, and we're
+     seeing an event for it, it means we've already processed the
+     event, and then subsequently resumed the thread [1], intending to
+     pass REPLY_LATER to ContinueDebugEvent.  Do that now, before the
+     switch table below, which may have side effects that don't make
+     sense for a delayed event.
+
+     [1] - with the caveat that sometimes Windows reports an event for
+     a suspended thread.  Also handled below.  */
+  if (result_th != nullptr && result_th->reply_later != 0)
+    {
+      DEBUG_EVENTS ("reply-later thread 0x%x, suspended=%d, dwDebugEventCode=%s",
+		    result_th->tid, result_th->suspended,
+		    event_code_to_string (event_code).c_str ());
+
+      gdb_assert (dbg_reply_later_available ());
+
+      /* We never ask to DBG_REPLY_LATER these two, so we shouldn't
+	 see them here.  If a thread is forced-exited when a
+	 DBG_REPLY_LATER is in effect, then we will still see the
+	 DBG_REPLY_LATER-ed event before the thread/process exit
+	 event.  */
+      gdb_assert (event_code != EXIT_THREAD_DEBUG_EVENT
+		  && event_code != EXIT_PROCESS_DEBUG_EVENT);
+
+      if (result_th->suspended == 1)
+	{
+	  /* Pending stop.  See the comment by the definition of
+	     "pending_status" for details on why this is needed.  */
+	  DEBUG_EVENTS ("unexpected reply-later stop in suspended thread 0x%x",
+			result_th->tid);
+
+	  /* Put the event back in the kernel queue.  We haven't yet
+	     decided which reply to use.  */
+	  continue_status = DBG_REPLY_LATER;
+	}
+      else if (result_th->suspended == -1)
+	{
+	  /* We resumed the thread expecting to get back a reply-later
+	     event.  Before we saw that event, we tried to suspend the
+	     thread, but that failed, because the thread exited
+	     (likely because the whole process has been killed).  We
+	     should get back an EXIT_THREAD_DEBUG_EVENT for this
+	     thread, but only after getting past this reply-later
+	     event.  */
+	  DEBUG_EVENTS ("reply-later stop in suspend-failed "
+			"thread 0x%x, ignoring",
+			result_th->tid);
+
+	  /* Continue normally, and expect a
+	     EXIT_THREAD_DEBUG_EVENT.  */
+	  continue_status = DBG_CONTINUE;
+	  result_th->reply_later = 0;
+	}
+      else
+	{
+	  continue_status = result_th->reply_later;
+	  result_th->reply_later = 0;
+	}
+
+      /* Go back to waiting for the next event.  */
+      continue_last_debug_event_main_thread
+	(_("Failed to continue reply-later event"), continue_status);
+
+      ourstatus->set_ignore ();
+      return null_ptid;
+    }
+
   DEBUG_EVENTS ("kernel event for pid=%u tid=0x%x code=%s",
 		(unsigned) current_event->dwProcessId,
 		(unsigned) current_event->dwThreadId,
@@ -1653,17 +2217,29 @@ windows_nat_target::get_windows_debug_event
 	      current_event->u.CreateThread.lpThreadLocalBase,
 	      false /* main_thread_p */));
 
-	/* This updates debug registers if necessary.  */
-	windows_process.continue_one_thread (th, 0);
+	/* Update the debug registers if we're not reporting the stop.
+	   If we are (reporting the stop), the debug registers will be
+	   updated when the thread is eventually re-resumed.  */
+	if (m_report_thread_events)
+	  ourstatus->set_thread_created ();
+	else
+	  windows_process.continue_one_thread (th, 0);
       }
       break;
 
     case EXIT_THREAD_DEBUG_EVENT:
-      delete_thread (ptid_t (current_event->dwProcessId,
-			     current_event->dwThreadId, 0),
-		     current_event->u.ExitThread.dwExitCode,
-		     false /* main_thread_p */);
-      thread_id = 0;
+      {
+	ourstatus->set_thread_exited
+	  (current_event->u.ExitThread.dwExitCode);
+	thread_id = current_event->dwThreadId;
+
+	handle_thread_exit (*current_event);
+
+	/* Don't decide yet whether to report the event, or delete the
+	   thread immediately, because we still need to check whether
+	   the event should be left pending, depending on whether the
+	   thread was running or not from the core's perspective.  */
+      }
       break;
 
     case CREATE_PROCESS_DEBUG_EVENT:
@@ -1692,9 +2268,6 @@ windows_nat_target::get_windows_debug_event
 	}
       else if (windows_process.saw_create == 1)
 	{
-	  delete_thread (ptid_t (current_event->dwProcessId,
-				 current_event->dwThreadId, 0),
-			 0, true /* main_thread_p */);
 	  DWORD exit_status = current_event->u.ExitProcess.dwExitCode;
 	  /* If the exit status looks like a fatal exception, but we
 	     don't recognize the exception's code, make the original
@@ -1706,7 +2279,10 @@ windows_nat_target::get_windows_debug_event
 	    ourstatus->set_exited (exit_status);
 	  else
 	    ourstatus->set_signalled (gdb_signal_from_host (exit_signal));
-	  return ptid_t (current_event->dwProcessId);
+
+	  thread_id = current_event->dwThreadId;
+
+	  handle_thread_exit (*current_event);
 	}
       break;
 
@@ -1765,8 +2341,24 @@ windows_nat_target::get_windows_debug_event
     case OUTPUT_DEBUG_STRING_EVENT:	/* Message from the kernel.  */
       if (windows_process.saw_create != 1)
 	break;
-      thread_id = windows_process.handle_output_debug_string (*current_event,
-							      ourstatus);
+      if (windows_process.handle_output_debug_string (*current_event,
+						      ourstatus))
+	{
+	  /* We caught a Cygwin signal for a thread.  That thread now
+	     has a pending event, and the "sig" thread is
+	     suspended.  */
+	  serial_event_set (m_wait_event);
+
+	  /* In all-stop, return now to avoid reaching
+	     ContinueDebugEvent further below.  In all-stop, it's
+	     always windows_nat_target::resume that does the
+	     ContinueDebugEvent call.  */
+	  if (!target_is_non_stop_p ())
+	    {
+	      ourstatus->set_ignore ();
+	      return null_ptid;
+	    }
+	}
       break;
 
     default:
@@ -1788,35 +2380,79 @@ windows_nat_target::get_windows_debug_event
       return null_ptid;
     }
 
-  const ptid_t ptid = ptid_t (current_event->dwProcessId, thread_id, 0);
-  windows_thread_info *th = windows_process.find_thread (ptid);
+  const ptid_t ptid = ptid_t (current_event->dwProcessId, thread_id);
+  thread_info *thread = this->find_thread (ptid);
+  auto *th = as_windows_thread_info (thread);
 
   th->last_event = *current_event;
 
-  if (th->suspended)
+  if (thread->internal_state () == THREAD_INT_STOPPED)
     {
+      gdb_assert (th->suspended != 0);
+
       /* Pending stop.  See the comment by the definition of
 	 "pending_status" for details on why this is needed.  */
       DEBUG_EVENTS ("get_windows_debug_event - "
 		    "unexpected stop in suspended thread 0x%x",
 		    thread_id);
 
-      if (current_event->dwDebugEventCode == EXCEPTION_DEBUG_EVENT
-	  && ((current_event->u.Exception.ExceptionRecord.ExceptionCode
-	       == EXCEPTION_BREAKPOINT)
-	      || (current_event->u.Exception.ExceptionRecord.ExceptionCode
-		  == STATUS_WX86_BREAKPOINT))
-	  && windows_process.windows_initialization_done)
+      /* Use DBG_REPLY_LATER to put the event back in the kernel queue
+	 if possible.  Don't do that with exit-thread or exit-process
+	 events, because when a thread is dead, it can't be suspended
+	 anymore, so the kernel would immediately re-report the
+	 event.  */
+      if (event_code != EXIT_THREAD_DEBUG_EVENT
+	  && event_code != EXIT_PROCESS_DEBUG_EVENT
+	  && dbg_reply_later_available ())
 	{
-	  th->stopped_at_software_breakpoint = true;
-	  th->pc_adjusted = false;
+	  /* Thankfully, the Windows kernel doesn't immediately
+	     re-report the unexpected event for a suspended thread
+	     when we defer it with DBG_REPLY_LATER, otherwise this
+	     would get us stuck in an infinite loop re-processing the
+	     same unexpected event over and over.  (Which is what
+	     would happen if we used DBG_REPLY_LATER on an exit-thread
+	     or exit-process event.  See comment above.)  */
+	  continue_status = DBG_REPLY_LATER;
+	}
+      else
+	{
+	  if (current_event->dwDebugEventCode == EXCEPTION_DEBUG_EVENT
+	      && ((current_event->u.Exception.ExceptionRecord.ExceptionCode
+		   == EXCEPTION_BREAKPOINT)
+		  || (current_event->u.Exception.ExceptionRecord.ExceptionCode
+		      == STATUS_WX86_BREAKPOINT))
+	      && windows_process.windows_initialization_done)
+	    {
+	      th->stopped_at_software_breakpoint = true;
+	      th->pc_adjusted = false;
+	    }
+
+	  th->pending_status = *ourstatus;
+	  th->last_event = {};
 	}
 
-      th->pending_status = *ourstatus;
+      /* For exit-process, the debug event is continued later, at
+	 mourn time.  */
+      if (event_code != EXIT_PROCESS_DEBUG_EVENT)
+	{
+	  continue_last_debug_event_main_thread
+	    (_("Failed to resume program execution"), continue_status);
+	}
       ourstatus->set_ignore ();
+      return null_ptid;
+    }
 
-      continue_last_debug_event_main_thread
-	(_("Failed to resume program execution"), continue_status);
+  gdb_assert (thread->internal_state () == THREAD_INT_RUNNING);
+
+  /* Now that we've handled exit events for suspended threads (above),
+     we can finally decide whether to report the thread exit event or
+     just delete the thread without bothering the core.  */
+  if (ourstatus->kind () == TARGET_WAITKIND_THREAD_EXITED
+      && !m_report_thread_events)
+    {
+      delete_thread (ptid, ourstatus->exit_status (),
+		     false /* main_thread_p */);
+      ourstatus->set_spurious ();
       return null_ptid;
     }
 
@@ -1843,15 +2479,26 @@ windows_nat_target::wait (ptid_t ptid, struct target_waitstatus *ourstatus,
 
   while (1)
     {
-      DEBUG_EVENT current_event;
+      DEBUG_EVENT current_event {};
 
       ptid_t result = get_windows_debug_event (pid, ourstatus, options,
 					       &current_event);
+      /* True if this is a pending event that we injected ourselves,
+	 instead of a real event out of WaitForDebugEvent.  */
+      bool fake = current_event.dwDebugEventCode == 0;
+
+      DEBUG_EVENTS ("get_windows_debug_event returned [%s : %s, fake=%d]",
+		    result.to_string ().c_str (),
+		    ourstatus->to_string ().c_str(),
+		    fake);
 
       if ((options & TARGET_WNOHANG) != 0
 	  && ourstatus->kind () == TARGET_WAITKIND_IGNORE)
 	return result;
 
+      if (ourstatus->kind () == TARGET_WAITKIND_NO_RESUMED)
+	return result;
+
       if (ourstatus->kind () == TARGET_WAITKIND_SPURIOUS)
 	{
 	  continue_last_debug_event_main_thread
@@ -1877,10 +2524,40 @@ windows_nat_target::wait (ptid_t ptid, struct target_waitstatus *ourstatus,
 		  th->pc_adjusted = false;
 		}
 
+	      /* If non-stop, suspend the event thread, and continue
+		 it with DBG_REPLY_LATER, so the other threads go back
+		 to running as soon as possible.  Don't do this if
+		 stopping the thread, as in that case the thread was
+		 already suspended, and also there's no real Windows
+		 debug event to continue in that case.  */
+	      if (windows_process.windows_initialization_done
+		  && target_is_non_stop_p ()
+		  && !fake)
+		{
+		  if (ourstatus->kind () == TARGET_WAITKIND_THREAD_EXITED)
+		    {
+		      gdb_assert (th->suspended == -1);
+		      continue_last_debug_event_main_thread
+			(_("Init: Failed to DBG_CONTINUE after thread exit"),
+			 DBG_CONTINUE);
+		    }
+		  else
+		    {
+		      th->suspend ();
+		      th->reply_later = DBG_CONTINUE;
+		      continue_last_debug_event_main_thread
+			(_("Init: Failed to defer event with DBG_REPLY_LATER"),
+			 DBG_REPLY_LATER);
+		    }
+		}
+
 	      /* All-stop, suspend all threads until they are
 		 explicitly resumed.  */
-	      for (auto *thr : all_windows_threads ())
-		thr->suspend ();
+	      if (!target_is_non_stop_p ())
+		for (auto *thr : all_windows_threads ())
+		  thr->suspend ();
+
+	      th->stopping = false;
 	    }
 
 	  /* If something came out, assume there may be more.  This is
@@ -1946,22 +2623,31 @@ windows_nat_target::do_initial_windows_stuff (DWORD pid, bool attaching)
 
   ptid_t last_ptid;
 
+  /* Keep fetching events until we see the initial breakpoint (which
+     is planted by Windows itself) being reported.  */
+
   while (1)
     {
       struct target_waitstatus status;
 
       last_ptid = this->wait (minus_one_ptid, &status, 0);
 
-      /* Note windows_wait returns TARGET_WAITKIND_SPURIOUS for thread
-	 events.  */
-      if (status.kind () != TARGET_WAITKIND_LOADED
-	  && status.kind () != TARGET_WAITKIND_SPURIOUS)
+      /* These result in an error being thrown before we get here.  */
+      gdb_assert (status.kind () != TARGET_WAITKIND_EXITED
+		  && status.kind () != TARGET_WAITKIND_SIGNALLED);
+
+      /* We may also see TARGET_WAITKIND_THREAD_EXITED if
+	 target_thread_events is active (because another thread was
+	 stepping earlier, for example).  Ignore such events until we
+	 see the initial breakpoint.  */
+
+      if (status.kind () == TARGET_WAITKIND_STOPPED)
 	break;
 
       /* Don't use windows_nat_target::resume here because that
 	 assumes that inferior_ptid points at a valid thread, and we
 	 haven't switched to any thread yet.  */
-      windows_continue (DBG_CONTINUE, -1);
+      windows_continue (DBG_CONTINUE, -1, WCONT_CONTINUE_DEBUG_EVENT);
     }
 
   switch_to_thread (this->find_thread (last_ptid));
@@ -2105,7 +2791,29 @@ windows_nat_target::attach (const char *args, int from_tty)
 #endif
 
   do_initial_windows_stuff (pid, 1);
-  target_terminal::ours ();
+
+  if (target_is_non_stop_p ())
+    {
+      /* Leave all threads running.  */
+
+      continue_last_debug_event_main_thread
+	(_("Failed to DBG_CONTINUE after attach"),
+	 DBG_CONTINUE);
+
+      /* The thread that reports the initial breakpoint, and thus ends
+	 up as selected thread here, was injected by Windows into the
+	 program for the attach, and it exits as soon as we resume it.
+	 Switch to the first thread in the inferior, otherwise the
+	 user will be left with an exited thread selected.  */
+      switch_to_thread (first_thread_of_inferior (current_inferior ()));
+    }
+  else
+    {
+      set_state (this, minus_one_ptid, THREAD_STOPPED);
+      set_internal_state (this, minus_one_ptid, THREAD_INT_STOPPED);
+
+      target_terminal::ours ();
+    }
 }
 
 void
@@ -2206,16 +2914,77 @@ windows_nat_target::break_out_process_thread (bool &process_alive)
 
       DEBUG_EVENTS ("got unrelated event, code %u",
 		    current_event.dwDebugEventCode);
-      windows_continue (DBG_CONTINUE, -1, 0);
+
+      DWORD continue_status
+	= continue_status_for_event_detaching (current_event);
+      windows_continue (continue_status, -1, WCONT_CONTINUE_DEBUG_EVENT);
     }
 
   if (injected_thread_handle != NULL)
     CHECK (CloseHandle (injected_thread_handle));
 }
 
+
+/* Used while detaching.  Decide whether to pass the exception or not.
+   Returns the dwContinueStatus argument to pass to
+   ContinueDebugEvent.  */
+
+DWORD
+windows_nat_target::continue_status_for_event_detaching
+  (const DEBUG_EVENT &event, size_t *reply_later_events_left)
+{
+  ptid_t ptid (event.dwProcessId, event.dwThreadId, 0);
+  windows_thread_info *th = windows_process.find_thread (ptid);
+
+  /* This can be a thread that we don't know about, as we're not
+     tracking thread creation events at this point.  */
+  if (th != nullptr && th->reply_later != 0)
+    {
+      DWORD res = th->reply_later;
+      th->reply_later = 0;
+      if (reply_later_events_left != nullptr)
+	(*reply_later_events_left)--;
+      return res;
+    }
+  else if (event.dwDebugEventCode == EXCEPTION_DEBUG_EVENT)
+    {
+      /* As the user asked to detach already, any new exception not
+	 seen by infrun before, is passed down to the inferior without
+	 considering "handle SIG pass/nopass".  We can just pretend
+	 the exception was raised after the inferior was detached.  */
+      return DBG_EXCEPTION_NOT_HANDLED;
+    }
+  else
+    return DBG_CONTINUE;
+}
+
 void
 windows_nat_target::detach (inferior *inf, int from_tty)
 {
+  DWORD continue_status = DBG_CONTINUE;
+
+  /* For any thread the core hasn't resumed, call prepare_resume with
+     the signal that the thread would be resumed with, so that we set
+     the right reply_later value, and also, so that we clear the trace
+     flag.  */
+  for (thread_info *tp : inf->non_exited_threads ())
+    {
+      if (tp->internal_state () != THREAD_INT_RUNNING)
+	{
+	  windows_thread_info *wth = windows_process.find_thread (tp->ptid);
+	  gdb_signal signo = get_detach_signal (this, tp->ptid);
+
+	  if (signo != wth->last_sig
+	      || (signo != GDB_SIGNAL_0 && !signal_pass_state (signo)))
+	    signo = GDB_SIGNAL_0;
+
+	  DWORD cstatus = prepare_resume (wth, tp, 0, signo);
+
+	  if (!m_continued && tp->ptid == get_last_debug_event_ptid ())
+	    continue_status = cstatus;
+	}
+    }
+
   /* If we see the process exit while unblocking the process_thread
      helper thread, then we should skip the actual
      DebugActiveProcessStop call.  But don't report an error.  Just
@@ -2223,20 +2992,76 @@ windows_nat_target::detach (inferior *inf, int from_tty)
   bool process_alive = true;
 
   /* The process_thread helper thread will be blocked in
-     WaitForDebugEvent waiting for events if we've resumed the target
-     before we get here, e.g., with "attach&" or "c&".  We need to
-     unblock it so that we can have it call DebugActiveProcessStop
-     below, in the do_synchronously block.  */
+     WaitForDebugEvent waiting for events if we're in non-stop mode,
+     or if in all-stop and we've resumed the target before we get
+     here, e.g., with "attach&" or "c&".  We need to unblock it so
+     that we can have it call DebugActiveProcessStop below, in the
+     do_synchronously block.  */
   if (m_continued)
-    break_out_process_thread (process_alive);
+    {
+      break_out_process_thread (process_alive);
+
+      /* We're now either stopped at a thread exit event, or a process
+	 exit event.  */
+      continue_status = DBG_CONTINUE;
+    }
 
-  windows_continue (DBG_CONTINUE, -1, WCONT_LAST_CALL);
+  windows_continue (continue_status, -1,
+		    WCONT_LAST_CALL | WCONT_CONTINUE_DEBUG_EVENT);
 
   std::optional<unsigned> err;
   if (process_alive)
     do_synchronously ([&] ()
       {
-	if (!DebugActiveProcessStop (windows_process.process_id))
+	/* The kernel re-raises any exception previously intercepted
+	   and deferred with DBG_REPLY_LATER in the inferior after we
+	   detach.  We need to flush those, and suppress those which
+	   aren't meant to be seen by the inferior (e.g., breakpoints,
+	   single-steps, any with matching "handle SIG nopass", etc.),
+	   otherwise the inferior dies immediately after the detach,
+	   due to an unhandled exception.  */
+	DEBUG_EVENT event;
+
+	/* Count how many threads have pending reply-later events.  */
+	size_t reply_later_events_left = 0;
+	for (auto *th : all_windows_threads ())
+	  if (th->reply_later != 0)
+	    reply_later_events_left++;
+
+	DEBUG_EVENTS ("flushing %zu reply-later events",
+		      reply_later_events_left);
+
+	/* Note we have to use a blocking wait (hence the need for the
+	   counter).  Just polling (timeout=0) until WaitForDebugEvent
+	   returns false would be racy -- the kernel may take a little
+	   bit to put the events in the pending queue.  That has been
+	   observed on Windows 11, where detaching would still very
+	   occasionally result in the inferior dying after the detach
+	   due to a reply-later event.  */
+	while (reply_later_events_left > 0
+	       && wait_for_debug_event (&event, INFINITE))
+	  {
+	    DEBUG_EVENTS ("flushed kernel event code %u",
+			  event.dwDebugEventCode);
+
+	    DWORD cstatus = (continue_status_for_event_detaching
+			     (event, &reply_later_events_left));
+	    if (!continue_last_debug_event (cstatus, debug_events))
+	      {
+		err = (unsigned) GetLastError ();
+		return false;
+	      }
+
+	    if (event.dwDebugEventCode == EXIT_PROCESS_DEBUG_EVENT)
+	      {
+		DEBUG_EVENTS ("got EXIT_PROCESS_DEBUG_EVENT, skipping detach");
+		process_alive = false;
+		break;
+	      }
+	  }
+
+	if (process_alive
+	    && !DebugActiveProcessStop (windows_process.process_id))
 	  err = (unsigned) GetLastError ();
 	else
 	  DebugSetProcessKillOnExit (FALSE);
@@ -2976,13 +3801,32 @@ windows_nat_target::create_inferior (const char *exec_file,
 
   do_initial_windows_stuff (pi.dwProcessId, 0);
 
-  /* windows_continue (DBG_CONTINUE, -1); */
+  /* Present the initial thread as stopped to the core.  */
+  windows_thread_info *th = windows_process.find_thread (inferior_ptid);
+
+  th->suspend ();
+  set_state (this, inferior_ptid, THREAD_STOPPED);
+  set_internal_state (this, inferior_ptid, THREAD_INT_STOPPED);
+
+  if (target_is_non_stop_p ())
+    {
+      /* In non-stop mode, we always immediately use DBG_REPLY_LATER
+	 on threads as soon as they report an event.  However, during
+	 the initial startup, windows_nat_target::wait does not do
+	 this, so we need to handle it here for the initial
+	 thread.  */
+      th->reply_later = DBG_CONTINUE;
+      continue_last_debug_event_main_thread
+	(_("Failed to defer event with DBG_REPLY_LATER"),
+	 DBG_REPLY_LATER);
+    }
 }
 
 void
 windows_nat_target::mourn_inferior ()
 {
-  windows_continue (DBG_CONTINUE, -1, WCONT_LAST_CALL);
+  windows_continue (DBG_CONTINUE, -1,
+		    WCONT_LAST_CALL | WCONT_CONTINUE_DEBUG_EVENT);
   x86_cleanup_dregs();
   if (windows_process.open_process_used)
     {
@@ -3032,19 +3876,55 @@ windows_xfer_memory (gdb_byte *readbuf, const gdb_byte *writebuf,
     return success ? TARGET_XFER_OK : TARGET_XFER_E_IO;
 }
 
+/* Return true if all the threads of the process have already
+   exited.  */
+
+static bool
+already_dead ()
+{
+  for (windows_thread_info *th : all_windows_threads ())
+    if (th->h != nullptr)
+      return false;
+  return true;
+}
+
 void
 windows_nat_target::kill ()
 {
+  /* If all the threads of the process have already exited, there is
+     really nothing to kill.  This can happen with e.g., scheduler
+     locking, where the thread exit events for all threads are still
+     pending to be processed by the core.  */
+  if (already_dead ())
+    {
+      target_mourn_inferior (inferior_ptid);
+      return;
+    }
+
   CHECK (TerminateProcess (windows_process.handle, 0));
 
+  /* In non-stop mode, windows_continue does not call
+     ContinueDebugEvent by default.  This behavior is appropriate for
+     the first call to windows_continue because any thread that is
+     stopped has already been ContinueDebugEvent'ed with
+     DBG_REPLY_LATER.  However, after the first
+     wait_for_debug_event_main_thread call in the loop, this will no
+     longer be true.
+
+     In all-stop mode, the WCONT_CONTINUE_DEBUG_EVENT flag has no
+     effect, so writing the code in this way ensures that the code is
+     the same for both modes.  */
+  windows_continue_flags flags = WCONT_KILLED;
+
   for (;;)
     {
-      if (!windows_continue (DBG_CONTINUE, -1, WCONT_KILLED))
+      if (!windows_continue (DBG_CONTINUE, -1, flags))
 	break;
       DEBUG_EVENT current_event;
       wait_for_debug_event_main_thread (&current_event);
       if (current_event.dwDebugEventCode == EXIT_PROCESS_DEBUG_EVENT)
 	break;
+      flags |= WCONT_CONTINUE_DEBUG_EVENT;
     }
 
   target_mourn_inferior (inferior_ptid);	/* Or just windows_mourn_inferior?  */
@@ -3179,6 +4059,16 @@ windows_nat_target::thread_name (struct thread_info *thr)
 }
 
 
+/* Implementation of the target_ops::supports_non_stop method.  */
+
+bool
+windows_nat_target::supports_non_stop ()
+{
+  /* Non-stop support requires DBG_REPLY_LATER, which only exists on
+     Windows 10 and later.  */
+  return dbg_reply_later_available ();
+}
+
 void _initialize_windows_nat ();
 void
 _initialize_windows_nat ()
diff --git a/gdbserver/win32-low.cc b/gdbserver/win32-low.cc
index 1d3b24540d6..7f5641bd265 100644
--- a/gdbserver/win32-low.cc
+++ b/gdbserver/win32-low.cc
@@ -344,8 +344,9 @@ do_initial_child_stuff (HANDLE proch, DWORD pid, int attached)
 
       the_target->wait (minus_one_ptid, &status, 0);
 
-      /* Note win32_wait doesn't return thread events.  */
-      if (status.kind () != TARGET_WAITKIND_LOADED)
+      if (status.kind () == TARGET_WAITKIND_EXITED
+	  || status.kind () == TARGET_WAITKIND_SIGNALLED
+	  || status.kind () == TARGET_WAITKIND_STOPPED)
 	{
 	  windows_process.cached_status = status;
 	  break;
@@ -604,7 +605,7 @@ win32_process_target::attach (unsigned long pid)
 
 /* See nat/windows-nat.h.  */
 
-DWORD
+bool
 gdbserver_windows_process::handle_output_debug_string
   (const DEBUG_EVENT &current_event,
    struct target_waitstatus *ourstatus)
@@ -615,7 +616,7 @@ gdbserver_windows_process::handle_output_debug_string
   DWORD nbytes = current_event.u.DebugString.nDebugStringLength;
 
   if (nbytes == 0)
-    return 0;
+    return false;
 
   if (nbytes > READ_BUFFER_LEN)
     nbytes = READ_BUFFER_LEN;
@@ -634,7 +635,7 @@ gdbserver_windows_process::handle_output_debug_string
   else
     {
       if (read_inferior_memory (addr, (unsigned char *) s, nbytes) != 0)
-	return 0;
+	return false;
     }
 
   if (!startswith (s, "cYg"))
@@ -642,14 +643,14 @@ gdbserver_windows_process::handle_output_debug_string
       if (!server_waiting)
 	{
 	  OUTMSG2(("%s", s));
-	  return 0;
+	  return false;
 	}
 
       monitor_output (s);
     }
 #undef READ_BUFFER_LEN
 
-  return 0;
+  return false;
 }
 
 static void
diff --git a/gdbserver/win32-low.h b/gdbserver/win32-low.h
index 123fbc76f57..60ad00b9c0a 100644
--- a/gdbserver/win32-low.h
+++ b/gdbserver/win32-low.h
@@ -182,8 +182,8 @@ public:
 struct gdbserver_windows_process : public windows_nat::windows_process_info
 {
   windows_nat::windows_thread_info *find_thread (ptid_t ptid) override;
-  DWORD handle_output_debug_string (const DEBUG_EVENT &current_event,
-				    struct target_waitstatus *ourstatus) override;
+  bool handle_output_debug_string (const DEBUG_EVENT &current_event,
+				   struct target_waitstatus *ourstatus) override;
   void handle_load_dll (const char *dll_name, LPVOID base) override;
   void handle_unload_dll (const DEBUG_EVENT &current_event) override;
   bool handle_access_violation (const EXCEPTION_RECORD *rec) override;