[libgomp, nvptx] Handle per-function max-threads-per-block in default dims

author vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 30 Jul 2018 08:17:26 +0000 (08:17 +0000)

committer vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 30 Jul 2018 08:17:26 +0000 (08:17 +0000)
author vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 30 Jul 2018 08:17:26 +0000 (08:17 +0000)
committer vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 30 Jul 2018 08:17:26 +0000 (08:17 +0000)
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog

index 1d218f4be6c07e916e1f1547604c231be30e1e4a..6cd30bbf49d0d9277276429509d9abb27eb0e3ad 100644 (file)
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,9 @@
+2018-07-30  Tom de Vries  <tdevries@suse.de>
+
+       * plugin/plugin-nvptx.c (MIN, MAX): Redefine.
+       (nvptx_exec): Ensure worker and vector default dims don't exceed
+       targ_fn->max_threads_per_block.
+
  2018-07-30  Tom de Vries  <tdevries@suse.de>
  
         * plugin/plugin-nvptx.c (struct ptx_device): Add default_dims field.
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c

index 5c522aaf2819279adb811a7b479238669a6f0682..b6ec5f88d59a087e613f272bc45e9fe96f6915be 100644 (file)
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -141,6 +141,11 @@ init_cuda_lib (void)
  
  #include "secure_getenv.h"
  
+#undef MIN
+#undef MAX
+#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
+#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
+
  /* Convenience macros for the frequently used CUDA library call and
     error handling sequence as well as CUDA library calls that
     do the error checking themselves or don't do it at all.  */
@@ -1135,6 +1140,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
    void *kargs[1];
    void *hp, *dp;
    struct nvptx_thread *nvthd = nvptx_thread ();
+  int warp_size = nvthd->ptx_dev->warp_size;
    const char *maybe_abort_msg = "(perhaps abort was called)";
  
    function = targ_fn->fn;
@@ -1175,7 +1181,6 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
  
           int gang, worker, vector;
           {
-           int warp_size = nvthd->ptx_dev->warp_size;
             int block_size = nvthd->ptx_dev->max_threads_per_block;
             int cpu_size = nvthd->ptx_dev->max_threads_per_multiprocessor;
             int dev_size = nvthd->ptx_dev->num_sms;
@@ -1213,9 +1218,25 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs,
         }
        pthread_mutex_unlock (&ptx_dev_lock);
  
-      for (i = 0; i != GOMP_DIM_MAX; i++)
-       if (!dims[i])
-         dims[i] = nvthd->ptx_dev->default_dims[i];
+      {
+       bool default_dim_p[GOMP_DIM_MAX];
+       for (i = 0; i != GOMP_DIM_MAX; i++)
+         {
+           default_dim_p[i] = !dims[i];
+           if (default_dim_p[i])
+             dims[i] = nvthd->ptx_dev->default_dims[i];
+         }
+
+       if (default_dim_p[GOMP_DIM_VECTOR])
+         dims[GOMP_DIM_VECTOR]
+           = MIN (dims[GOMP_DIM_VECTOR],
+                  (targ_fn->max_threads_per_block / warp_size * warp_size));
+
+       if (default_dim_p[GOMP_DIM_WORKER])
+         dims[GOMP_DIM_WORKER]
+           = MIN (dims[GOMP_DIM_WORKER],
+                  targ_fn->max_threads_per_block / dims[GOMP_DIM_VECTOR]);
+      }
      }
  
    /* Check if the accelerator has sufficient hardware resources to
author	vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 30 Jul 2018 08:17:26 +0000 (08:17 +0000)
committer	vries <vries@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 30 Jul 2018 08:17:26 +0000 (08:17 +0000)
libgomp/ChangeLog		patch \| blob \| blame \| history
libgomp/plugin/plugin-nvptx.c		patch \| blob \| blame \| history