]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Separate OpenACC kernels regions in data and parallel parts
authorGergö Barany <gergo@codesourcery.com>
Mon, 21 Jan 2019 13:28:20 +0000 (05:28 -0800)
committerThomas Schwinge <thomas@codesourcery.com>
Tue, 3 Mar 2020 11:16:37 +0000 (12:16 +0100)
This is the first in a series of patches that completely rework the handling
of the OpenACC "kernels" directive. In the future, kernels regions will be
transformed into data regions containing a sequence of serial and parallel
offloaded regions. This first patch sets up a new pass that is responsible
for this transformation, and in a first step constructs the new data region
containing a parallel region with the original kernels region's body.

gcc/
* Makefile.in: Add...
* omp-oacc-kernels.c: ... this new file for the kernels conversion
pass.
* flag-types.h (enum openacc_kernels): Add "split" style.  Adjust
all users.
* doc/invoke.texi (-fopenacc-kernels): Update.
* passes.def: Add pass_convert_oacc_kernels to pipeline.
* tree-pass.h (make_pass_convert_oacc_kernels): Add declaration.
gcc/testsuite/
* c-c++-common/goacc/kernels-conversion.c: New test.
* gfortran.dg/goacc/kernels-conversion.f95: Likewise.
* c-c++-common/goacc/if-clause-2.c: Update.
* gfortran.dg/goacc/kernels-tree.f95: Likewise.

(cherry picked from openacc-gcc-9-branch commit
f769c55eb290873a16b3da32fa15c03ff0d5578e)

14 files changed:
gcc/ChangeLog.omp
gcc/Makefile.in
gcc/c-family/c.opt
gcc/doc/invoke.texi
gcc/flag-types.h
gcc/fortran/lang.opt
gcc/omp-oacc-kernels.c [new file with mode: 0644]
gcc/passes.def
gcc/testsuite/ChangeLog.omp
gcc/testsuite/c-c++-common/goacc/if-clause-2.c
gcc/testsuite/c-c++-common/goacc/kernels-conversion.c [new file with mode: 0644]
gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95 [new file with mode: 0644]
gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95
gcc/tree-pass.h

index 5191284441353694701683fbf693e2b05ff881b9..2599fbf60621d335e78fd11920d374ccd5dce950 100644 (file)
@@ -1,3 +1,14 @@
+2019-01-21  Gergö Barany  <gergo@codesourcery.com>
+
+       * Makefile.in: Add...
+       * omp-oacc-kernels.c: ... this new file for the kernels conversion
+       pass.
+       * flag-types.h (enum openacc_kernels): Add "split" style.  Adjust
+       all users.
+       * doc/invoke.texi (-fopenacc-kernels): Update.
+       * passes.def: Add pass_convert_oacc_kernels to pipeline.
+       * tree-pass.h (make_pass_convert_oacc_kernels): Add declaration.
+
 2019-01-23  Thomas Schwinge  <thomas@codesourcery.com>
 
        * gimple.h (enum gf_mask): Add new target kinds
index 5f43d9de00ecf0c6f828cbe91e82284e82c6009c..9e93eaaa1739d67292671cb2c25d4e3cac8703cf 100644 (file)
@@ -1430,6 +1430,7 @@ OBJS = \
        omp-general.o \
        omp-grid.o \
        omp-low.o \
+       omp-oacc-kernels.o \
        omp-simd-clone.o \
        opt-problem.o \
        optabs.o \
@@ -2558,6 +2559,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/omp-offload.c \
   $(srcdir)/omp-expand.c \
   $(srcdir)/omp-low.c \
+  $(srcdir)/omp-oacc-kernels.c \
   $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \
   $(srcdir)/cgraphclones.c \
   $(srcdir)/tree-phinodes.c \
index 13ae4f8559668a9b99b20d894b8a64581af61abc..0c0c161fcf493e97fbbdc6ab9bbb5f76d009858f 100644 (file)
@@ -1669,11 +1669,15 @@ C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims)
 Specify default OpenACC compute dimensions.
 
 fopenacc-kernels=
-C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) Undocumented
+C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS)
+-fopenacc-kernels=[split|parloops]     Configure OpenACC 'kernels' constructs handling.
 
 Enum
 Name(openacc_kernels) Type(enum openacc_kernels)
 
+EnumValue
+Enum(openacc_kernels) String(split) Value(OPENACC_KERNELS_SPLIT)
+
 EnumValue
 Enum(openacc_kernels) String(parloops) Value(OPENACC_KERNELS_PARLOOPS)
 
index 671ce193254f5eae1ce4b3c27e818df8cc24120f..1f6d2516c745df416eb520ffe27209eea42d0d29 100644 (file)
@@ -198,7 +198,7 @@ in the following sections.
 -aux-info @var{filename}  -fallow-parameterless-variadic-functions @gol
 -fno-asm  -fno-builtin  -fno-builtin-@var{function}  -fgimple@gol
 -fhosted  -ffreestanding @gol
--fopenacc  -fopenacc-dim=@var{geom} @gol
+-fopenacc  -fopenacc-dim=@var{geom}  -fopenacc-kernels=@var{style} @gol
 -fopenmp  -fopenmp-simd @gol
 -fms-extensions  -fplan9-extensions  -fsso-struct=@var{endianness} @gol
 -fallow-single-precision  -fcond-mismatch  -flax-vector-conversions @gol
@@ -2188,6 +2188,17 @@ to runtime, the environment variable @var{GOMP_OPENACC_DIM} can be set.
 It has the same format as the option value, except that '-' is not
 permitted.
 
+@item -fopenacc-kernels=@var{style}
+@opindex fopenacc-kernels
+@cindex OpenACC accelerator programming
+Configure OpenACC 'kernels' constructs handling.
+With @option{-fopenacc-kernels=split}, OpenACC 'kernels' constructs
+are split into a sequence of compute constructs, each then handled
+individually.
+With @option{-fopenacc-kernels=parloops}, the whole OpenACC
+'kernels' constructs is handled by the @samp{parloops} pass.
+This is the default.
+
 @item -fopenmp
 @opindex fopenmp
 @cindex OpenMP parallel
index 24a80858ec65dbafc5a77b852b0cb2a5268c872d..ce326078cd37b52647e261d80dca0f584afbdd80 100644 (file)
@@ -358,6 +358,7 @@ enum cf_protection_level
 /* OpenACC 'kernels' constructs handling.  */
 enum openacc_kernels
 {
+  OPENACC_KERNELS_SPLIT,
   OPENACC_KERNELS_PARLOOPS
 };
 #endif /* ! GCC_FLAG_TYPES_H */
index 227b99b5f2dd0c317890dab5d1a816623efaae5f..e6f607da602ae99287f43f3a764a7af0826f35af 100644 (file)
@@ -651,7 +651,8 @@ Fortran LTO Joined Var(flag_openacc_dims)
 ; Documented in C
 
 fopenacc-kernels=
-Fortran RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) Undocumented
+Fortran RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS)
+; Documented in C
 
 fopenmp
 Fortran LTO
diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c
new file mode 100644 (file)
index 0000000..d180377
--- /dev/null
@@ -0,0 +1,245 @@
+/* Transformation pass for OpenACC kernels regions.  Converts a kernels
+   region into a series of smaller parallel regions.  There is a parallel
+   region for each parallelizable loop nest, as well as a "gang-single"
+   parallel region for each non-parallelizable piece of code.
+
+   Contributed by Gergö Barany <gergo@codesourcery.com> and
+                  Thomas Schwinge <thomas@codesourcery.com>
+
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "fold-const.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "gimple-walk.h"
+#include "gomp-constants.h"
+
+/* This is a preprocessing pass to be run immediately before lower_omp.  It
+   will convert OpenACC "kernels" regions into sequences of "parallel"
+   regions.
+   For now, the translation is as follows:
+   - The entire kernels region is turned into a data region with clauses
+     taken from the kernels region.  New "create" clauses are added for all
+     variables declared at the top level in the kernels region.  */
+
+/* Transform KERNELS_REGION, which is an OpenACC kernels region, into a data
+   region containing the original kernels region.  */
+
+static gimple *
+transform_kernels_region (gimple *kernels_region)
+{
+  gcc_checking_assert (gimple_omp_target_kind (kernels_region)
+                        == GF_OMP_TARGET_KIND_OACC_KERNELS);
+
+  /* Collect the kernels region's data clauses and create the new data
+     region with those clauses.  */
+  tree kernels_clauses = gimple_omp_target_clauses (kernels_region);
+  tree data_clauses = NULL;
+  for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    {
+      /* Certain map clauses are copied to the enclosing data region.  Any
+         non-data clause remains on the kernels region.  */
+      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP)
+        {
+          tree decl = OMP_CLAUSE_DECL (c);
+          HOST_WIDE_INT kind = OMP_CLAUSE_MAP_KIND (c);
+          switch (kind)
+            {
+            default:
+              if (kind == GOMP_MAP_ALLOC &&
+                  integer_zerop (OMP_CLAUSE_SIZE (c)))
+                /* ??? This is an alloc clause for mapping a pointer whose
+                   target is already mapped.  We leave these on the inner
+                   parallel regions because moving them to the outer data
+                   region causes runtime errors.  */
+                break;
+
+              /* For non-artificial variables, and for non-declaration
+                 expressions like A[0:n], copy the clause to the data
+                 region.  */
+              if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl))
+                  || !DECL_P (decl))
+                {
+                  tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c),
+                                                      OMP_CLAUSE_MAP);
+                  OMP_CLAUSE_SET_MAP_KIND (new_clause, kind);
+                  /* This must be unshared here to avoid "incorrect sharing
+                     of tree nodes" errors from verify_gimple.  */
+                  OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl);
+                  OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c);
+                  OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
+                  data_clauses = new_clause;
+
+                  /* Now that this data is mapped, the inner data clause on
+                     the kernels region can become a present clause.  */
+                  OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT);
+                }
+              break;
+
+            case GOMP_MAP_POINTER:
+            case GOMP_MAP_TO_PSET:
+            case GOMP_MAP_FORCE_TOFROM:
+            case GOMP_MAP_FIRSTPRIVATE_POINTER:
+            case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
+              /* ??? Copying these map kinds leads to internal compiler
+                 errors in later passes.  */
+              break;
+            }
+        }
+      else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
+        {
+          /* If there is an if clause, it must also be present on the
+             enclosing data region.  Temporarily remove the if clause's
+             chain to avoid copying it.  */
+          tree saved_chain = OMP_CLAUSE_CHAIN (c);
+          OMP_CLAUSE_CHAIN (c) = NULL;
+          tree new_if_clause = unshare_expr (c);
+          OMP_CLAUSE_CHAIN (c) = saved_chain;
+          OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
+          data_clauses = new_if_clause;
+        }
+    }
+  /* Restore the original order of the clauses.  */
+  data_clauses = nreverse (data_clauses);
+
+  gimple *data_region
+    = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS,
+                               data_clauses);
+  gimple_set_location (data_region, gimple_location (kernels_region));
+
+  /* For now, just construct a new parallel region inside the data region.  */
+  gimple *inner_region
+    = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_PARALLEL,
+                               kernels_clauses);
+  gimple_set_location (inner_region, gimple_location (kernels_region));
+  gimple_omp_set_body (inner_region, gimple_omp_body (kernels_region));
+
+  gbind *bind = gimple_build_bind (NULL, NULL, NULL);
+  gimple_bind_add_stmt (bind, inner_region);
+
+  /* Put the transformed pieces together.  The entire body of the region is
+     wrapped in a try-finally statement that calls __builtin_GOACC_data_end
+     for cleanup.  */
+  tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END);
+  gimple *call = gimple_build_call (data_end_fn, 0);
+  gimple_seq cleanup = NULL;
+  gimple_seq_add_stmt (&cleanup, call);
+  gimple *try_stmt = gimple_build_try (bind, cleanup, GIMPLE_TRY_FINALLY);
+  gimple_omp_set_body (data_region, try_stmt);
+
+  return data_region;
+}
+
+/* Helper function of convert_oacc_kernels for walking the tree, calling
+   transform_kernels_region on each kernels region found.  */
+
+static tree
+scan_kernels (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
+              struct walk_stmt_info *)
+{
+  gimple *stmt = gsi_stmt (*gsi_p);
+  *handled_ops_p = false;
+
+  int kind;
+  switch (gimple_code (stmt))
+    {
+    case GIMPLE_OMP_TARGET:
+      kind = gimple_omp_target_kind (stmt);
+      if (kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
+        {
+          gimple *new_region = transform_kernels_region (stmt);
+          gsi_replace (gsi_p, new_region, false);
+          *handled_ops_p = true;
+        }
+      break;
+
+    default:
+      break;
+    }
+
+  return NULL;
+}
+
+/* Find and transform OpenACC kernels regions in the current function.  */
+
+static unsigned int
+convert_oacc_kernels (void)
+{
+  struct walk_stmt_info wi;
+  gimple_seq body = gimple_body (current_function_decl);
+
+  memset (&wi, 0, sizeof (wi));
+  walk_gimple_seq_mod (&body, scan_kernels, NULL, &wi);
+
+  gimple_set_body (current_function_decl, body);
+
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_convert_oacc_kernels =
+{
+  GIMPLE_PASS, /* type */
+  "convert_oacc_kernels", /* name */
+  OPTGROUP_OMP, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_gimple_any, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_convert_oacc_kernels : public gimple_opt_pass
+{
+public:
+  pass_convert_oacc_kernels (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_convert_oacc_kernels, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+  {
+    return (flag_openacc
+           && flag_openacc_kernels == OPENACC_KERNELS_SPLIT);
+  }
+  virtual unsigned int execute (function *)
+  {
+    return convert_oacc_kernels ();
+  }
+
+}; // class pass_convert_oacc_kernels
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_convert_oacc_kernels (gcc::context *ctxt)
+{
+  return new pass_convert_oacc_kernels (ctxt);
+}
index bc147c4444db8e79ded11ca5f2230becfa86d630..f4c4b96d96d865875ca7a221f67e7d75643f2d81 100644 (file)
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_warn_unused_result);
   NEXT_PASS (pass_diagnose_omp_blocks);
   NEXT_PASS (pass_diagnose_tm_blocks);
+  NEXT_PASS (pass_convert_oacc_kernels);
   NEXT_PASS (pass_lower_omp);
   NEXT_PASS (pass_lower_cf);
   NEXT_PASS (pass_lower_tm);
index 68cafc080262972965392dc3d7982774f2fa2dac..1bcc3e949d1f77a9c2cf2e4b5f97dca670eae64a 100644 (file)
@@ -1,3 +1,10 @@
+2019-01-21  Gergö Barany  <gergo@codesourcery.com>
+
+       * c-c++-common/goacc/kernels-conversion.c: New test.
+       * gfortran.dg/goacc/kernels-conversion.f95: Likewise.
+       * c-c++-common/goacc/if-clause-2.c: Update.
+       * gfortran.dg/goacc/kernels-tree.f95: Likewise.
+
 2019-01-30  Thomas Schwinge  <thomas@codesourcery.com>
 
        * c-c++-common/goacc/kernels-1.c: Add
index 5ab8459d732317b9d9fb84cea8ca2d13e6f77374..e17b5dd1107aa38f01b554ad16796cc7821dd6d4 100644 (file)
@@ -1,3 +1,6 @@
+/* { dg-additional-options "-fopenacc-kernels=split" } */
+/* { dg-additional-options "-fdump-tree-convert_oacc_kernels" } */
+
 void
 f (short c)
 {
@@ -9,3 +12,7 @@ f (short c)
   ;
 #pragma acc update device(c) if(c)
 }
+
+/* Verify that the 'if' clause gets duplicated.
+   { dg-final { scan-tree-dump-times "#pragma omp target oacc_data_kernels if\\(" 1 "convert_oacc_kernels" } }
+   { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel_kernels_gang_single .* if\\(" 1 "convert_oacc_kernels" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c b/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c
new file mode 100644 (file)
index 0000000..c75db37
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-additional-options "-fopenacc-kernels=split" } */
+/* { dg-additional-options "-fdump-tree-convert_oacc_kernels" } */
+
+#define N 1024
+
+unsigned int a[N];
+
+int
+main (void)
+{
+  int i;
+  unsigned int sum = 1;
+
+#pragma acc kernels copyin(a[0:N]) copy(sum)
+  {
+    #pragma acc loop
+    for (i = 0; i < N; ++i)
+      sum += a[i];
+
+    sum++;
+
+    #pragma acc loop
+    for (i = 0; i < N; ++i)
+      sum += a[i];
+  }
+
+  return 0;
+}
+
+/* Check that the kernels region is split into a data region and an enclosed
+   parallel region.  */ 
+/* { dg-final { scan-tree-dump-times "oacc_data_kernels" 1 "convert_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-times "oacc_parallel" 1 "convert_oacc_kernels" } } */
+
+/* Check that the original kernels region is removed.  */
+/* { dg-final { scan-tree-dump-not "oacc_kernels" "convert_oacc_kernels" } } */
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95
new file mode 100644 (file)
index 0000000..8c66330
--- /dev/null
@@ -0,0 +1,33 @@
+! { dg-additional-options "-fopenacc-kernels=split" }
+! { dg-additional-options "-fdump-tree-convert_oacc_kernels" }
+
+program main
+  implicit none
+  integer, parameter         :: N = 1024
+  integer, dimension (1:N)   :: a
+  integer                    :: i, sum
+
+  !$acc kernels copyin(a(1:N)) copy(sum)
+
+  !$acc loop
+  do i = 1, N
+    sum = sum + a(i)
+  end do
+
+  sum = sum + 1
+
+  !$acc loop
+  do i = 1, N
+    sum = sum + a(i)
+  end do
+
+  !$acc end kernels
+end program main
+
+! Check that the kernels region is split into a data region and an enclosed
+! parallel region.
+! { dg-final { scan-tree-dump-times "oacc_data_kernels" 1 "convert_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "oacc_parallel" 1 "convert_oacc_kernels" } }
+
+! Check that the original kernels region is removed.
+! { dg-final { scan-tree-dump-not "oacc_kernels" "convert_oacc_kernels" } }
index a70f1e737bd1f4e170e6891d8bdffe7867a429a6..b83ca2d8f0640ef66abaa609bbdbf53344fee58c 100644 (file)
@@ -1,5 +1,7 @@
 ! { dg-do compile } 
 ! { dg-additional-options "-fdump-tree-original" } 
+! { dg-additional-options "-fopenacc-kernels=split" }
+! { dg-additional-options "-fdump-tree-convert_oacc_kernels" }
 
 program test
   implicit none
@@ -33,3 +35,7 @@ end program test
 ! { dg-final { scan-tree-dump-times "map\\(alloc:t\\)" 1 "original" } } 
 
 ! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } } 
+
+! Verify that the 'if' clause gets duplicated.
+! { dg-final { scan-tree-dump-times "#pragma omp target oacc_data_kernels if\\(" 1 "convert_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel_kernels_gang_single .* if\\(" 1 "convert_oacc_kernels" } }
index 3a0b3805d24dbd50141d4145563861e4ae3768f3..f00c919945272c61e63302484d0a85ef85b714eb 100644 (file)
@@ -412,6 +412,7 @@ extern gimple_opt_pass *make_pass_lower_switch_O0 (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vector (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_convert_oacc_kernels (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);