From e20f63228799bd78293ff155244bd5009089aaf5 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 21 Feb 2024 11:47:03 +0000 Subject: [PATCH] gdb, infrun: fix silent inferior switch in do_target_wait() In do_target_wait(), we iterate over inferiors and call do_target_wait_1(), which eventually calls target_wait() per inferior. The surrounding code selects a random inferior and then iterates over all inferiors starting from the selected inferior. In each iteration, it waits for one inferior. We forgot to restrict the wait, however, so we may end up with an event for a different inferior. In some cases, e.g. gdb.threads/detach-step-over.exp, we ask to wait for one inferior, and get an event from a different inferior back without noticing the inferior switch. Wait for a single inferior, instead. Since we iterate over all inferiors, we still cover everything. This exposes another bug with STOP_QUIETLY_NO_SIGSTOP handling. After attaching, we interrupt all threads in the new inferior, then call do_target_wait() to receive the stopped events. This randomly selects an inferior to start waiting for and iterates over all inferiors starting from there. The initial stop event for the main thread is already queued up, so we wouldn't actually wait() if we had started with the new inferior. Or if we had waited for minus_one_ptid, which would then have silently switched inferiors. Since we no longer allow that, we may actually wait() for the new inferior and find other events to report, out of which we randomly select one. If we selected an event for another thread, e.g. one that had been interrupted as part of non-stop attach, STOP_QUIETLY_NO_SIGSTOP would be applied to that thread (unnecessarily), leaving the main thread with a SIGSTOP event but last_resume_kind = resume_continue. When the main thread is later selected, SIGSTOP is reported to the user. Normally, linux-nat's wait() turns the SIGSTOP it uses for interrupting threads into GDB_SIGNAL_0. This is based on last_resume_kind, which is set to resume_stop when sending SIGSTOP to interrupt a thread. We do this for all threads of the new inferior when interrupting them as part of non-stop attach. Except for the main thread, which we expect to be reported before the first wait(). Set last_resume_kind to resume_stop for the main thread after attaching. --- gdb/infrun.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++--- gdb/linux-nat.c | 17 +++++++++++----- gdb/remote.c | 22 +++++++++++++++----- 3 files changed, 79 insertions(+), 13 deletions(-) diff --git a/gdb/infrun.c b/gdb/infrun.c index 9b26ba88621..a6e12c48615 100644 --- a/gdb/infrun.c +++ b/gdb/infrun.c @@ -4203,7 +4203,24 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, auto do_wait = [&] (inferior *inf) { - ecs->ptid = do_target_wait_1 (inf, wait_ptid, &ecs->ws, options); + ptid_t ptid { inf->pid }; + + /* Make sure we're not widening WAIT_PTID. */ + if (!ptid.matches (wait_ptid) + /* Targets that cannot async will be asked for a blocking wait. + + Blocking wait does not work inferior-by-inferior if the target + provides more than one inferior. Fall back to waiting for + WAIT_PTID in that case. */ + || !target_can_async_p () || ((options & TARGET_WNOHANG) == 0) + /* We cannot wait for inferiors without a pid. + + One such inferior is created by initialize_inferiors () to + ensure that there always is an inferior. */ + || !ptid.is_pid ()) + ptid = wait_ptid; + + ecs->ptid = do_target_wait_1 (inf, ptid, &ecs->ws, options); ecs->target = inf->process_target (); return (ecs->ws.kind () != TARGET_WAITKIND_IGNORE); }; @@ -4213,6 +4230,18 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, reported the stop to the user, polling for events. */ scoped_restore_current_thread restore_thread; + /* The first TARGET_WAITKIND_NO_RESUMED execution state. + + We do not want to return TARGET_WAITKIND_NO_RESUMED right away since + another inferior may have a more interesting event to report. If + there is no other event to report, after all, we want to report the + first such event. + + This variable holds that first event, which will be copied on the + first TARGET_WAITKIND_NO_RESUMED below. */ + execution_control_state no_resumed {}; + no_resumed.ptid = null_ptid; + intrusive_list_iterator start = inferior_list.iterator_to (*selected); @@ -4223,7 +4252,13 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, inferior *inf = &*it; if (inferior_matches (inf) && do_wait (inf)) - return true; + { + if (ecs->ws.kind () != TARGET_WAITKIND_NO_RESUMED) + return true; + + if (no_resumed.ptid == null_ptid) + no_resumed = *ecs; + } } for (intrusive_list_iterator it = inferior_list.begin (); @@ -4233,7 +4268,19 @@ do_target_wait (ptid_t wait_ptid, execution_control_state *ecs, inferior *inf = &*it; if (inferior_matches (inf) && do_wait (inf)) - return true; + { + if (ecs->ws.kind () != TARGET_WAITKIND_NO_RESUMED) + return true; + + if (no_resumed.ptid == null_ptid) + no_resumed = *ecs; + } + } + + if (no_resumed.ptid != null_ptid) + { + *ecs = no_resumed; + return true; } ecs->ws.set_ignore (); diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c index 2f98060506b..7889b2882e1 100644 --- a/gdb/linux-nat.c +++ b/gdb/linux-nat.c @@ -1162,6 +1162,7 @@ linux_nat_target::attach (const char *args, int from_tty) /* Add the initial process as the first LWP to the list. */ lp = add_initial_lwp (ptid); + lp->last_resume_kind = resume_stop; status = linux_nat_post_attach_wait (lp->ptid, &lp->signalled); if (!WIFSTOPPED (status)) @@ -3324,12 +3325,18 @@ linux_nat_wait_1 (ptid_t ptid, struct target_waitstatus *ourstatus, moment at which we know its PID. */ if (ptid.is_pid () && find_lwp_pid (ptid) == nullptr) { - ptid_t lwp_ptid (ptid.pid (), ptid.pid ()); + /* Unless we already did and this is simply a request to wait for a + particular inferior. */ + inferior *inf = find_inferior_ptid (linux_target, ptid); + if ((inf != nullptr) && (inf->find_thread (ptid) != nullptr)) + { + ptid_t lwp_ptid (ptid.pid (), ptid.pid ()); - /* Upgrade the main thread's ptid. */ - thread_change_ptid (linux_target, ptid, lwp_ptid); - lp = add_initial_lwp (lwp_ptid); - lp->resumed = 1; + /* Upgrade the main thread's ptid. */ + thread_change_ptid (linux_target, ptid, lwp_ptid); + lp = add_initial_lwp (lwp_ptid); + lp->resumed = 1; + } } /* Make sure SIGCHLD is blocked until the sigsuspend below. */ diff --git a/gdb/remote.c b/gdb/remote.c index 4b829d85f11..445b4ee3153 100644 --- a/gdb/remote.c +++ b/gdb/remote.c @@ -7919,12 +7919,24 @@ remote_target::remote_notif_remove_queued_reply (ptid_t ptid) { remote_state *rs = get_remote_state (); + auto pred = [=] (const stop_reply_up &event) + { + /* A null ptid should only happen if we have a single process. It + wouldn't match the process ptid, though, so let's check this case + separately. */ + if ((event->ptid == null_ptid) && ptid.is_pid ()) + return true; + + /* A minus one ptid should only happen for events that match + everything. It wouldn't match a process or thread ptid, though, so + let's check this case separately. */ + if (event->ptid == minus_one_ptid) + return true; + + return event->ptid.matches (ptid); + }; auto iter = std::find_if (rs->stop_reply_queue.begin (), - rs->stop_reply_queue.end (), - [=] (const stop_reply_up &event) - { - return event->ptid.matches (ptid); - }); + rs->stop_reply_queue.end (), pred); stop_reply_up result; if (iter != rs->stop_reply_queue.end ()) { -- 2.47.2