OpenMP, NVPTX: memcpy[23]D bias correction

author Julian Brown <julian@codesourcery.com>

Wed, 23 Aug 2023 23:46:29 +0000 (23:46 +0000)

committer Julian Brown <julian@codesourcery.com>

Wed, 20 Dec 2023 21:35:36 +0000 (21:35 +0000)
author Julian Brown <julian@codesourcery.com>
Wed, 23 Aug 2023 23:46:29 +0000 (23:46 +0000)
committer Julian Brown <julian@codesourcery.com>
Wed, 20 Dec 2023 21:35:36 +0000 (21:35 +0000)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c

index d4a254ed4f0b950c6d7a66ce15b368137812337e..a31f2592493de7fa0b6f985edfa7062da1a7f867 100644 (file)
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1907,6 +1907,35 @@ GOMP_OFFLOAD_memcpy2d (int dst_ord, int src_ord, size_t dim1_size,
    data.srcXInBytes = src_offset1_size;
    data.srcY = src_offset0_len;
  
+  if (data.srcXInBytes != 0 || data.srcY != 0)
+    {
+      /* Adjust origin to the actual array data, else the CUDA 2D memory
+        copy API calls below may fail to validate source/dest pointers
+        correctly (especially for Fortran where the "virtual origin" of an
+        array is often outside the stored data).  */
+      if (src_ord == -1)
+       data.srcHost = (const void *) ((const char *) data.srcHost
+                                     + data.srcY * data.srcPitch
+                                     + data.srcXInBytes);
+      else
+       data.srcDevice += data.srcY * data.srcPitch + data.srcXInBytes;
+      data.srcXInBytes = 0;
+      data.srcY = 0;
+    }
+
+  if (data.dstXInBytes != 0 || data.dstY != 0)
+    {
+      /* As above.  */
+      if (dst_ord == -1)
+       data.dstHost = (void *) ((char *) data.dstHost
+                                + data.dstY * data.dstPitch
+                                + data.dstXInBytes);
+      else
+       data.dstDevice += data.dstY * data.dstPitch + data.dstXInBytes;
+      data.dstXInBytes = 0;
+      data.dstY = 0;
+    }
+
    CUresult res = CUDA_CALL_NOCHECK (cuMemcpy2D, &data);
    if (res == CUDA_ERROR_INVALID_VALUE)
      /* If pitch > CU_DEVICE_ATTRIBUTE_MAX_PITCH or for device-to-device
@@ -1975,6 +2004,44 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
    data.srcY = src_offset1_len;
    data.srcZ = src_offset0_len;
  
+  if (data.srcXInBytes != 0 || data.srcY != 0 || data.srcZ != 0)
+    {
+      /* Adjust origin to the actual array data, else the CUDA 3D memory
+        copy API call below may fail to validate source/dest pointers
+        correctly (especially for Fortran where the "virtual origin" of an
+        array is often outside the stored data).  */
+      if (src_ord == -1)
+       data.srcHost
+         = (const void *) ((const char *) data.srcHost
+                           + (data.srcZ * data.srcHeight + data.srcY)
+                             * data.srcPitch
+                           + data.srcXInBytes);
+      else
+       data.srcDevice
+         += (data.srcZ * data.srcHeight + data.srcY) * data.srcPitch
+            + data.srcXInBytes;
+      data.srcXInBytes = 0;
+      data.srcY = 0;
+      data.srcZ = 0;
+    }
+
+  if (data.dstXInBytes != 0 || data.dstY != 0 || data.dstZ != 0)
+    {
+      /* As above.  */
+      if (dst_ord == -1)
+       data.dstHost = (void *) ((char *) data.dstHost
+                                + (data.dstZ * data.dstHeight + data.dstY)
+                                  * data.dstPitch
+                                + data.dstXInBytes);
+      else
+       data.dstDevice
+         += (data.dstZ * data.dstHeight + data.dstY) * data.dstPitch
+            + data.dstXInBytes;
+      data.dstXInBytes = 0;
+      data.dstY = 0;
+      data.dstZ = 0;
+    }
+
    CUDA_CALL (cuMemcpy3D, &data);
    return true;
  }
diff --git a/libgomp/testsuite/libgomp.c-c++-common/memcpyxd-bias-1.c b/libgomp/testsuite/libgomp.c-c++-common/memcpyxd-bias-1.c

new file mode 100644 (file)

index 0000000..6aa7b3d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/memcpyxd-bias-1.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+#include <omp.h>
+
+/* Say this is N rows and M columns.  */
+#define N 1024
+#define M 2048
+
+#define row_offset 256
+#define row_length 512
+#define col_offset 128
+#define col_length 384
+
+int
+main ()
+{
+  int *arr2d = (int *) calloc (N * M, sizeof (int));
+  uintptr_t dstptr;
+  int hostdev = omp_get_initial_device ();
+  int targdev;
+
+#pragma omp target enter data map(to: arr2d[col_offset*M:col_length*M])
+
+#pragma omp target map(from: targdev, dstptr) \
+                  map(present, tofrom: arr2d[col_offset*M:col_length*M])
+  {
+    for (int j = col_offset; j < col_offset + col_length; j++)
+      for (int i = row_offset; i < row_offset + row_length; i++)
+       arr2d[j * M + i]++;
+    targdev = omp_get_device_num ();
+    dstptr = (uintptr_t) arr2d;
+  }
+
+  /* Copy rectangular block back to the host.  */
+  {
+    size_t volume[2] = { col_length, row_length };
+    size_t offsets[2] = { col_offset, row_offset };
+    size_t dimensions[2] = { N, M };
+    omp_target_memcpy_rect ((void *) arr2d, (const void *) dstptr,
+                           sizeof (int), 2, &volume[0], &offsets[0],
+                           &offsets[0], &dimensions[0], &dimensions[0],
+                           hostdev, targdev);
+  }
+
+#pragma omp target exit data map(release: arr2d[col_offset*M:col_length*M])
+
+  for (int j = 0; j < N; j++)
+    for (int i = 0; i < M; i++)
+      if (i >= row_offset && i < row_offset + row_length
+         && j >= col_offset && j < col_offset + col_length)
+       assert (arr2d[j * M + i] == 1);
+      else
+       assert (arr2d[j * M + i] == 0);
+
+  free (arr2d);
+
+  return 0;
+}
author	Julian Brown <julian@codesourcery.com>
	Wed, 23 Aug 2023 23:46:29 +0000 (23:46 +0000)
committer	Julian Brown <julian@codesourcery.com>
	Wed, 20 Dec 2023 21:35:36 +0000 (21:35 +0000)
libgomp/plugin/plugin-nvptx.c		patch \| blob \| blame \| history
libgomp/testsuite/libgomp.c-c++-common/memcpyxd-bias-1.c	[new file with mode: 0644]	patch \| blob