gdb/amd-dbgapi: disable forward progress requirement in amd_dbgapi_target_breakpoint...

author Simon Marchi <simon.marchi@efficios.com>

Mon, 9 Jun 2025 16:09:02 +0000 (12:09 -0400)

committer Simon Marchi <simon.marchi@efficios.com>

Mon, 16 Jun 2025 14:23:16 +0000 (10:23 -0400)
author Simon Marchi <simon.marchi@efficios.com>
Mon, 9 Jun 2025 16:09:02 +0000 (12:09 -0400)
committer Simon Marchi <simon.marchi@efficios.com>
Mon, 16 Jun 2025 14:23:16 +0000 (10:23 -0400)
diff --git a/gdb/amd-dbgapi-target.c b/gdb/amd-dbgapi-target.c

index fd4f9cba48ad855ec80dca7d3d2d42b1f4bed39c..e2a8ec83404ee2a346f3a8cc5f4cea27a9053edc 100644 (file)
--- a/gdb/amd-dbgapi-target.c
+++ b/gdb/amd-dbgapi-target.c
@@ -568,6 +568,8 @@ amd_dbgapi_target_breakpoint::check_status (struct bpstat *bs)
    if (action == AMD_DBGAPI_BREAKPOINT_ACTION_RESUME)
      return;
  
+  require_forward_progress (*info, false);
+
    /* If the action is AMD_DBGAPI_BREAKPOINT_ACTION_HALT, we need to wait until
       a breakpoint resume event for this breakpoint_id is seen.  */
    amd_dbgapi_event_id_t resume_event_id
@@ -1335,6 +1337,10 @@ static amd_dbgapi_event_id_t
  process_event_queue (amd_dbgapi_inferior_info &info,
                      amd_dbgapi_event_kind_t until_event_kind)
  {
+  /* Pulling events with forward progress required may result in bad
+     performance, make sure it is not required.  */
+  gdb_assert (!info.forward_progress_required);
+
    while (true)
      {
        amd_dbgapi_event_id_t event_id;
diff --git a/gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.cpp b/gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.cpp

new file mode 100644 (file)

index 0000000..d75bc76
--- /dev/null
+++ b/gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.cpp
@@ -0,0 +1,86 @@
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2025 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef DEVICE
+
+#include <hip/hip_runtime.h>
+
+constexpr unsigned int NUM_BREAKPOINT_HITS = 5;
+
+static __device__ void
+break_here ()
+{
+}
+
+extern "C" __global__ void
+kernel ()
+{
+  for (int n = 0; n < NUM_BREAKPOINT_HITS; ++n)
+    break_here ();
+}
+
+#else
+
+#include <hip/hip_runtime.h>
+#include <unistd.h>
+
+constexpr unsigned int NUM_ITEMS_PER_BLOCK = 256;
+constexpr unsigned int NUM_BLOCKS = 128;
+constexpr unsigned int NUM_ITEMS = NUM_ITEMS_PER_BLOCK * NUM_BLOCKS;
+constexpr unsigned int NUM_LOAD_UNLOADS = 5;
+
+#define CHECK(cmd)                                                            \
+  {                                                                           \
+    hipError_t error = cmd;                                                   \
+    if (error != hipSuccess)                                                  \
+      {                                                                       \
+       fprintf (stderr, "error: '%s'(%d) at %s:%d\n",                        \
+                hipGetErrorString (error), error, __FILE__, __LINE__);       \
+       exit (EXIT_FAILURE);                                                  \
+      }                                                                       \
+  }
+
+int
+main (int argc, const char **argv)
+{
+  if (argc != 2)
+    {
+      fprintf (stderr, "Usage: %s <hip_module_path>\n", argv[0]);
+      return 1;
+    }
+
+  const auto module_path = argv[1];
+  hipModule_t module;
+  CHECK (hipModuleLoad (&module, module_path));
+
+  /* Launch the kernel.  */
+  hipFunction_t function;
+  CHECK (hipModuleGetFunction (&function, module, "kernel"));
+  CHECK (hipModuleLaunchKernel (function, NUM_BLOCKS, 1, 1,
+                               NUM_ITEMS_PER_BLOCK, 1, 1, 0, nullptr, nullptr,
+                               nullptr));
+
+  /* Load and unload the module many times.  */
+  for (int i = 0; i < NUM_LOAD_UNLOADS; ++i)
+    {
+      hipModule_t dummy_module;
+      CHECK (hipModuleLoad (&dummy_module, module_path));
+      CHECK (hipModuleUnload (dummy_module));
+    }
+}
+
+#endif
diff --git a/gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.exp b/gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.exp

new file mode 100644 (file)

index 0000000..3fe6a95
--- /dev/null
+++ b/gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.exp
@@ -0,0 +1,68 @@
+# Copyright 2025 Free Software Foundation, Inc.
+
+# This file is part of GDB.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This test verifies what happens when a code object list update happens at the
+# same time as some wave stop events are reported.  It was added following a
+# performance bug fix, where forward progress requirement disabled when
+# pulling events from amd-dbgapi in amd_dbgapi_target_breakpoint::check_status.
+#
+# The test launches a kernel that hits a breakpoint with an always false
+# condition a certain number of times.  Meanwhile, the host loads and unloads
+# a code object in a loop, causing check_status to be called.  The hope is that
+# check_status, when calling process_event_queue, will pull many WAVE_STOP
+# events from the kernel hitting the breakpoint.
+#
+# Without the appropriate fix (of disabling forward progress requirement in
+# check_status), GDB would hit the newly-added assert in process_event_queue,
+# which verifies that forward progress requirement is disabled.  Even without
+# this assert, the test would likely time out (depending on the actual timeout
+# value).
+
+load_lib rocm.exp
+standard_testfile .cpp
+require allow_hipcc_tests
+
+# Build the host executable.
+if { [build_executable "failed to prepare" \
+         $testfile $srcfile {debug hip}] == -1 } {
+    return -1
+}
+
+set hipmodule_path [standard_output_file ${testfile}.co]
+
+# Build the kernel object file.
+if { [gdb_compile $srcdir/$subdir/$srcfile \
+       $hipmodule_path object \
+       { debug hip additional_flags=--genco additional_flags=-DDEVICE } ] != "" } {
+    return -1
+}
+
+proc do_test { } {
+    with_rocm_gpu_lock {
+       clean_restart $::binfile
+       gdb_test_no_output "set args $::hipmodule_path" "set args"
+
+       if { ![runto_main] } {
+               return
+       }
+
+       gdb_test "with breakpoint pending on -- break break_here if 0"
+       gdb_continue_to_end "continue to end" "continue" 1
+    }
+}
+
+do_test
author	Simon Marchi <simon.marchi@efficios.com>
	Mon, 9 Jun 2025 16:09:02 +0000 (12:09 -0400)
committer	Simon Marchi <simon.marchi@efficios.com>
	Mon, 16 Jun 2025 14:23:16 +0000 (10:23 -0400)
gdb/amd-dbgapi-target.c		patch \| blob \| blame \| history
gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.cpp	[new file with mode: 0644]	patch \| blob
gdb/testsuite/gdb.rocm/code-object-load-while-breakpoint-hit.exp	[new file with mode: 0644]	patch \| blob