GCN, nvptx: Errors during device probing are fatal

author Thomas Schwinge <tschwinge@baylibre.com>

Thu, 7 Mar 2024 13:42:07 +0000 (14:42 +0100)

committer Thomas Schwinge <tschwinge@baylibre.com>

Mon, 8 Apr 2024 20:08:00 +0000 (22:08 +0200)
author Thomas Schwinge <tschwinge@baylibre.com>
Thu, 7 Mar 2024 13:42:07 +0000 (14:42 +0100)
committer Thomas Schwinge <tschwinge@baylibre.com>
Mon, 8 Apr 2024 20:08:00 +0000 (22:08 +0200)
diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h

index 114aba4e074faf93138a01690e6c1267a9f77847..0dca4b3a5c0b055dcc13ceef7b2559c22fd73a36 100644 (file)
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -57,6 +57,7 @@ typedef enum {
    CUDA_ERROR_OUT_OF_MEMORY = 2,
    CUDA_ERROR_NOT_INITIALIZED = 3,
    CUDA_ERROR_DEINITIALIZED = 4,
+  CUDA_ERROR_NO_DEVICE = 100,
    CUDA_ERROR_INVALID_CONTEXT = 201,
    CUDA_ERROR_INVALID_HANDLE = 400,
    CUDA_ERROR_NOT_FOUND = 500,
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c

index 1d183b61ca4f7302b77cb714e42c3c1ca801bf81..27947801ccda7c4e99d13406f3b1456c43450389 100644 (file)
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -1513,10 +1513,12 @@ assign_agent_ids (hsa_agent_t agent, void *data)
  }
  
  /* Initialize hsa_context if it has not already been done.
-   Return TRUE on success.  */
+   If !PROBE: returns TRUE on success.
+   If PROBE: returns TRUE on success or if the plugin/device shall be silently
+   ignored, and otherwise emits an error and returns FALSE.  */
  
  static bool
-init_hsa_context (void)
+init_hsa_context (bool probe)
  {
    hsa_status_t status;
    int agent_index = 0;
@@ -1531,7 +1533,7 @@ init_hsa_context (void)
         GOMP_PLUGIN_fatal ("%s\n", msg);
        else
         GCN_WARNING ("%s\n", msg);
-      return false;
+      return probe ? true : false;
      }
    status = hsa_fns.hsa_init_fn ();
    if (status != HSA_STATUS_SUCCESS)
@@ -3337,8 +3339,8 @@ GOMP_OFFLOAD_version (void)
  int
  GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
  {
-  if (!init_hsa_context ())
-    return 0;
+  if (!init_hsa_context (true))
+    exit (EXIT_FAILURE);
    /* Return -1 if no omp_requires_mask cannot be fulfilled but
       devices were present.  */
    if (hsa_context.agent_count > 0
@@ -3355,7 +3357,7 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
  bool
  GOMP_OFFLOAD_init_device (int n)
  {
-  if (!init_hsa_context ())
+  if (!init_hsa_context (false))
      return false;
    if (n >= hsa_context.agent_count)
      {
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c

index ced6e014ecee2b076a1b57e6188ca67725d89a68..5aad3448a8db5cd9821ec23ab9354994586881ab 100644 (file)
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -604,12 +604,14 @@ nvptx_get_num_devices (void)
        CUresult r = CUDA_CALL_NOCHECK (cuInit, 0);
        /* This is not an error: e.g. we may have CUDA libraries installed but
           no devices available.  */
-      if (r != CUDA_SUCCESS)
+      if (r == CUDA_ERROR_NO_DEVICE)
         {
           GOMP_PLUGIN_debug (0, "Disabling nvptx offloading; cuInit: %s\n",
                              cuda_error (r));
           return 0;
         }
+      else if (r != CUDA_SUCCESS)
+       GOMP_PLUGIN_fatal ("cuInit error: %s", cuda_error (r));
      }
  
    CUDA_CALL_ASSERT (cuDeviceGetCount, &n);
author	Thomas Schwinge <tschwinge@baylibre.com>
	Thu, 7 Mar 2024 13:42:07 +0000 (14:42 +0100)
committer	Thomas Schwinge <tschwinge@baylibre.com>
	Mon, 8 Apr 2024 20:08:00 +0000 (22:08 +0200)
include/cuda/cuda.h		patch \| blob \| blame \| history
libgomp/plugin/plugin-gcn.c		patch \| blob \| blame \| history
libgomp/plugin/plugin-nvptx.c		patch \| blob \| blame \| history