]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
libgomp: Fix hang when profiling OpenACC programs with CUDA 9.0 nvprof
authorKwok Cheung Yeung <kcy@codesourcery.com>
Fri, 17 Jul 2020 13:06:48 +0000 (06:06 -0700)
committerKwok Cheung Yeung <kcy@codesourcery.com>
Fri, 17 Jul 2020 13:10:47 +0000 (06:10 -0700)
The version of nvprof in CUDA 9.0 causes a hang when used to profile an
OpenACC program.  This is because it calls acc_get_device_type from
a callback called during device initialization, which then attempts
to acquire acc_device_lock while it is already taken, resulting in
deadlock.  This works around the issue by returning acc_device_none
from acc_get_device_type without attempting to acquire the lock when
initialization has not completed yet.

This is a port of commit a1c022d1b9a43f85f0c451b6422fd095a704fe96 from
releases/gcc-10.  Most of the patch is already in this branch, so this
commit only contains changes to existing code.

2020-07-17  Thomas Schwinge  <thomas@codesourcery.com>
    Kwok Cheung Yeung  <kcy@codesourcery.com>

libgomp/
* oacc-init.c (acc_init_1): Move setting of acc_init_state to initializing
to the beginning of the function.
* libgomp.texi (acc_get_device_type): Update documentation.
(Implementation Status and Implementation-Defined Behavior): Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-2.c: New.

libgomp/ChangeLog.omp
libgomp/libgomp.texi
libgomp/oacc-init.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-2.c [new file with mode: 0644]

index 64980dbc38a9ca7bbdc297c8806543205e2d179c..db88fa64661b445f0b898cc6d0195023f10df261 100644 (file)
@@ -1,3 +1,12 @@
+2020-07-17  Thomas Schwinge  <thomas@codesourcery.com>
+           Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+       * oacc-init.c (acc_init_1): Move setting of acc_init_state to initializing
+       to the beginning of the function.
+       * libgomp.texi (acc_get_device_type): Update documentation.
+       (Implementation Status and Implementation-Defined Behavior): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-2.c: New.
+
 2020-07-16  Tobias Burnus  <tobias@codesourcery.com>
 
        * testsuite/libgomp.oacc-fortran/firstprivate-int.f90: Use
index 31a16fe2a4b810bd656b660cc1f72421088fabd7..a119eda9ee1c8653c37f6b62fcc72eca97f5fe41 100644 (file)
@@ -1970,6 +1970,12 @@ in @var{devicetype}, to use when executing a parallel or kernels region.
 This function returns what device type will be used when executing a
 parallel or kernels region.
 
+This function returns @code{acc_device_none} if
+@code{acc_get_device_type} is called from
+@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
+callbacks of the OpenACC Profiling Interface (@ref{OpenACC Profiling
+Interface}), that is, if the device is currently being initialized.
+
 @item @emph{C/C++}:
 @multitable @columnfractions .20 .80
 @item @emph{Prototype}: @tab @code{acc_device_t acc_get_device_type(void);}
@@ -3385,6 +3391,11 @@ every event that has been registered.
 
 We're not yet accounting for the fact that @cite{OpenACC events may
 occur during event processing}.
+We just handle one case specially, as required by CUDA 9.0
+@command{nvprof}, that @code{acc_get_device_type}
+(@ref{acc_get_device_type})) may be called from
+@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
+callbacks.
 
 As currently there are no inquiry functions defined, calls to
 @code{acc_prof_lookup} will always return @code{NULL}.
index 22105915d54010cf21965f44a6d187ec2bdcacad..8e8795308e28a79b048d8cd0cc1f545fec78450a 100644 (file)
@@ -233,6 +233,11 @@ acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
 static struct gomp_device_descr *
 acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
 {
+  gomp_mutex_lock (&acc_init_state_lock);
+  acc_init_state = initializing;
+  acc_init_thread = pthread_self ();
+  gomp_mutex_unlock (&acc_init_state_lock);
+
   bool check_not_nested_p;
   if (implicit)
     {
@@ -295,11 +300,6 @@ acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
   struct gomp_device_descr *base_dev, *acc_dev;
   int ndevs;
 
-  gomp_mutex_lock (&acc_init_state_lock);
-  acc_init_state = initializing;
-  acc_init_thread = pthread_self ();
-  gomp_mutex_unlock (&acc_init_state_lock);
-
   base_dev = resolve_device (d, true);
 
   ndevs = base_dev->get_num_devices_func ();
@@ -327,6 +327,10 @@ acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
                                &api_info);
     }
 
+  /* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that a
+     nested 'acc_get_device_type' called from a profiling callback still sees
+     'initializing', so that we don't deadlock when it then again tries to lock
+     'goacc_prof_lock'.  See also the discussion in 'acc_get_device_type'.  */
   gomp_mutex_lock (&acc_init_state_lock);
   acc_init_state = initialized;
   gomp_mutex_unlock (&acc_init_state_lock);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-2.c
new file mode 100644 (file)
index 0000000..b4e9f18
--- /dev/null
@@ -0,0 +1,80 @@
+/* { dg-do run } */
+/* { dg-timeout 10 } */
+
+/* Test the calling of 'acc_get_device_type' from within
+   'cb_device_init_start' and 'cb_device_init_end' callbacks.  This occurs
+   when the CUDA 9.0 'nvprof' tool is used, and previously deadlocked.  */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <acc_prof.h>
+
+static acc_prof_reg reg;
+static acc_prof_reg unreg;
+static acc_prof_lookup_func lookup;
+
+void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
+{
+  reg = reg_;
+  unreg = unreg_;
+  lookup = lookup_;
+}
+
+static bool expect_cb_device_init_start;
+static bool expect_cb_device_init_end;
+
+static void cb_device_init_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
+{
+  assert (expect_cb_device_init_start);
+  expect_cb_device_init_start = false;
+
+  acc_device_t acc_device_type;
+  acc_device_type = acc_get_device_type ();
+  assert (acc_device_type == acc_device_none);
+
+  expect_cb_device_init_end = true;
+}
+
+static void cb_device_init_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
+{
+  assert (expect_cb_device_init_end);
+  expect_cb_device_init_end = false;
+
+  acc_device_t acc_device_type;
+  acc_device_type = acc_get_device_type ();
+  assert (acc_device_type == acc_device_none);
+}
+
+int main(void)
+{
+  acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
+
+  reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
+  reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
+
+  expect_cb_device_init_start = true;
+  expect_cb_device_init_end = false;
+  acc_init (acc_device_host);
+  assert (!expect_cb_device_init_start);
+  assert (!expect_cb_device_init_end);
+  {
+    acc_device_t acc_device_type;
+    acc_device_type = acc_get_device_type ();
+    assert (acc_device_type == acc_device_host);
+  }
+  acc_shutdown (acc_device_host);
+
+  expect_cb_device_init_start = true;
+  expect_cb_device_init_end = false;
+  acc_init (acc_device_default);
+  assert (!expect_cb_device_init_start);
+  assert (!expect_cb_device_init_end);
+  {
+    acc_device_t acc_device_type;
+    acc_device_type = acc_get_device_type ();
+    assert (acc_device_type != acc_device_none);
+  }
+  acc_shutdown (acc_device_default);
+
+  return 0;
+}