openmp: fix max_vf setting for amdgcn offloading

author Andrew Stubbs <ams@codesourcery.com>

Fri, 8 Jul 2022 10:58:46 +0000 (11:58 +0100)

committer Andrew Stubbs <ams@codesourcery.com>

Tue, 12 Jul 2022 14:35:53 +0000 (15:35 +0100)
author Andrew Stubbs <ams@codesourcery.com>
Fri, 8 Jul 2022 10:58:46 +0000 (11:58 +0100)
committer Andrew Stubbs <ams@codesourcery.com>
Tue, 12 Jul 2022 14:35:53 +0000 (15:35 +0100)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp

index 8296bc927ea55fbc5b6d2829b75182af45e39bdf..0e840ba301d4d3007145062efa435bb0d367d952 100644 (file)
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,12 @@
+2022-07-12  Andrew Stubbs  <ams@codesourcery.com>
+
+       * gimple-loop-versioning.cc (loop_versioning::loop_versioning): Add
+       comment.
+       * omp-general.cc (omp_max_simd_vf): New function.
+       * omp-general.h (omp_max_simd_vf): New prototype.
+       * omp-low.cc (lower_rec_simd_input_clauses): Select largest from
+         omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf.
+
  2022-07-08  Tobias Burnus  <tobias@codesourcery.com>
  
         Backport from mainline:
diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc

index 3175a1e52498d8bdbae9e368161be2c0b085c50d..01d2bf03cceca9f248be68e449224350672d0460 100644 (file)
--- a/gcc/gimple-loop-versioning.cc
+++ b/gcc/gimple-loop-versioning.cc
@@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn)
       unvectorizable code, since it is the largest size that can be
       handled efficiently by scalar code.  omp_max_vf calculates the
       maximum number of bytes in a vector, when such a value is relevant
-     to loop optimization.  */
+     to loop optimization.
+     FIXME: this probably needs to use omp_max_simd_vf when in a target
+     region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that
+     it doesn't actually matter.)  */
    m_maximum_scale = estimated_poly_value (omp_max_vf ());
    m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
  }
diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc

index c044d808afcf2790830780c8c41b08886d8f9241..1cfb118225cd9e4db79443eb47b522a2b8a5a6ad 100644 (file)
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1012,6 +1012,24 @@ omp_max_simt_vf (void)
    return 0;
  }
  
+/* Return maximum SIMD width if offloading may target SIMD hardware.  */
+
+int
+omp_max_simd_vf (void)
+{
+  if (!optimize)
+    return 0;
+  if (ENABLE_OFFLOADING)
+    for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
+      {
+       if (startswith (c, "amdgcn"))
+         return 64;
+       else if ((c = strchr (c, ':')))
+         c++;
+      }
+  return 0;
+}
+
  /* Store the construct selectors as tree codes from last to first,
     return their number.  */
  
diff --git a/gcc/omp-general.h b/gcc/omp-general.h

index dba350a28d0669c411213eb86463eed830d4e3a6..fd66697ff6726ec18e7481dbdf0da5594186815d 100644 (file)
--- a/gcc/omp-general.h
+++ b/gcc/omp-general.h
@@ -116,6 +116,7 @@ extern gimple *omp_build_barrier (tree lhs);
  extern tree find_combined_omp_for (tree *, int *, void *);
  extern poly_uint64 omp_max_vf (void);
  extern int omp_max_simt_vf (void);
+extern int omp_max_simd_vf (void);
  extern int omp_constructor_traits_to_codes (tree, enum tree_code *);
  extern tree omp_check_context_selector (location_t loc, tree ctx);
  extern void omp_mark_declare_variant (location_t loc, tree variant,
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc

index 8ea8cf4fd5437720c09f4423aafe327eba5fc207..c69440fc91d5239b28a48be107432cc05d417665 100644 (file)
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -5160,7 +5160,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
  {
    if (known_eq (sctx->max_vf, 0U))
      {
-      sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf ();
+      /* If we are compiling for multiple devices choose the largest VF.  */
+      sctx->max_vf = omp_max_vf ();
+      if (omp_maybe_offloaded_ctx (ctx))
+       {
+         if (sctx->is_simt)
+           sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simt_vf ());
+         sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simd_vf ());
+       }
        if (maybe_gt (sctx->max_vf, 1U))
         {
           tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt),
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp

index 13141b9768e17fea0b2ac1fd5a7cce165e0e9618..2567e10231f2ce1ef70f20d8dbe462b06865c801 100644 (file)
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,10 @@
+2022-07-12  Andrew Stubbs  <ams@codesourcery.com>
+
+       * lib/target-supports.exp
+       (check_effective_target_amdgcn_offloading_enabled): New.
+       (check_effective_target_nvptx_offloading_enabled): New.
+       * gcc.dg/gomp/target-vf.c: New test.
+
  2022-07-05  Tobias Burnus  <tobias@codesourcery.com>
  
         Backport from mainline:
diff --git a/gcc/testsuite/gcc.dg/gomp/target-vf.c b/gcc/testsuite/gcc.dg/gomp/target-vf.c

new file mode 100644 (file)

index 0000000..14cea45
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/gomp/target-vf.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -O2 -fdump-tree-omplower" } */ 
+
+/* Ensure that the omp_max_vf, omp_max_simt_vf, and omp_max_simd_vf are working
+   properly to set the OpenMP vectorization factor for the offload target, and
+   not just for the host.  */
+
+float
+foo (float * __restrict x, float * __restrict y)
+{
+  float sum = 0.0;
+
+#pragma omp target teams distribute parallel for simd map(tofrom: sum) reduction(+:sum)
+  for (int i=0; i<1024; i++)
+    sum += x[i] * y[i];
+
+  return sum;
+}
+
+/* { dg-final { scan-tree-dump  "safelen\\(64\\)" "omplower" { target amdgcn_offloading_enabled } } } */
+/* { dg-final { scan-tree-dump  "safelen\\(32\\)" "omplower" { target { { nvptx_offloading_enabled } && { ! amdgcn_offloading_enabled } } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 4a845d3c23908e930ba5e2c3c2a3584f4aef4933..e40b06c48a505ab73c1857fc26fa90854464e38d 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1025,6 +1025,16 @@ proc check_effective_target_offloading_enabled {} {
      return [check_configured_with "--enable-offload-targets"]
  }
  
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_amdgcn_offloading_enabled {} {
+    return [check_configured_with {--enable-offload-targets=[^ ]*amdgcn}]
+}
+
+# Return 1 if compiled with --enable-offload-targets=amdgcn
+proc check_effective_target_nvptx_offloading_enabled {} {
+    return [check_configured_with {--enable-offload-targets=[^ ]*nvptx}]
+}
+
  # Return 1 if compilation with -fopenacc is error-free for trivial
  # code, 0 otherwise.
author	Andrew Stubbs <ams@codesourcery.com>
	Fri, 8 Jul 2022 10:58:46 +0000 (11:58 +0100)
committer	Andrew Stubbs <ams@codesourcery.com>
	Tue, 12 Jul 2022 14:35:53 +0000 (15:35 +0100)
gcc/ChangeLog.omp		patch \| blob \| blame \| history
gcc/gimple-loop-versioning.cc		patch \| blob \| blame \| history
gcc/omp-general.cc		patch \| blob \| blame \| history
gcc/omp-general.h		patch \| blob \| blame \| history
gcc/omp-low.cc		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog.omp		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/gomp/target-vf.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| blame \| history