From a773b7e7038cbc9e77d83f28adf1130c628be863 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Gerg=C3=B6=20Barany?= Date: Mon, 21 Jan 2019 05:28:20 -0800 Subject: [PATCH] Separate OpenACC kernels regions in data and parallel parts This is the first in a series of patches that completely rework the handling of the OpenACC "kernels" directive. In the future, kernels regions will be transformed into data regions containing a sequence of serial and parallel offloaded regions. This first patch sets up a new pass that is responsible for this transformation, and in a first step constructs the new data region containing a parallel region with the original kernels region's body. gcc/ * Makefile.in: Add... * omp-oacc-kernels.c: ... this new file for the kernels conversion pass. * flag-types.h (enum openacc_kernels): Add "split" style. Adjust all users. * doc/invoke.texi (-fopenacc-kernels): Update. * passes.def: Add pass_convert_oacc_kernels to pipeline. * tree-pass.h (make_pass_convert_oacc_kernels): Add declaration. gcc/testsuite/ * c-c++-common/goacc/kernels-conversion.c: New test. * gfortran.dg/goacc/kernels-conversion.f95: Likewise. * c-c++-common/goacc/if-clause-2.c: Update. * gfortran.dg/goacc/kernels-tree.f95: Likewise. (cherry picked from openacc-gcc-9-branch commit f769c55eb290873a16b3da32fa15c03ff0d5578e) --- gcc/ChangeLog.omp | 11 + gcc/Makefile.in | 2 + gcc/c-family/c.opt | 6 +- gcc/doc/invoke.texi | 13 +- gcc/flag-types.h | 1 + gcc/fortran/lang.opt | 3 +- gcc/omp-oacc-kernels.c | 245 ++++++++++++++++++ gcc/passes.def | 1 + gcc/testsuite/ChangeLog.omp | 7 + .../c-c++-common/goacc/if-clause-2.c | 7 + .../c-c++-common/goacc/kernels-conversion.c | 36 +++ .../gfortran.dg/goacc/kernels-conversion.f95 | 33 +++ .../gfortran.dg/goacc/kernels-tree.f95 | 6 + gcc/tree-pass.h | 1 + 14 files changed, 369 insertions(+), 3 deletions(-) create mode 100644 gcc/omp-oacc-kernels.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-conversion.c create mode 100644 gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95 diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 519128444135..2599fbf60621 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,14 @@ +2019-01-21 Gergö Barany + + * Makefile.in: Add... + * omp-oacc-kernels.c: ... this new file for the kernels conversion + pass. + * flag-types.h (enum openacc_kernels): Add "split" style. Adjust + all users. + * doc/invoke.texi (-fopenacc-kernels): Update. + * passes.def: Add pass_convert_oacc_kernels to pipeline. + * tree-pass.h (make_pass_convert_oacc_kernels): Add declaration. + 2019-01-23 Thomas Schwinge * gimple.h (enum gf_mask): Add new target kinds diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 5f43d9de00ec..9e93eaaa1739 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1430,6 +1430,7 @@ OBJS = \ omp-general.o \ omp-grid.o \ omp-low.o \ + omp-oacc-kernels.o \ omp-simd-clone.o \ opt-problem.o \ optabs.o \ @@ -2558,6 +2559,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/omp-offload.c \ $(srcdir)/omp-expand.c \ $(srcdir)/omp-low.c \ + $(srcdir)/omp-oacc-kernels.c \ $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \ $(srcdir)/cgraphclones.c \ $(srcdir)/tree-phinodes.c \ diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 13ae4f855966..0c0c161fcf49 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1669,11 +1669,15 @@ C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims) Specify default OpenACC compute dimensions. fopenacc-kernels= -C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) Undocumented +C ObjC C++ ObjC++ RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) +-fopenacc-kernels=[split|parloops] Configure OpenACC 'kernels' constructs handling. Enum Name(openacc_kernels) Type(enum openacc_kernels) +EnumValue +Enum(openacc_kernels) String(split) Value(OPENACC_KERNELS_SPLIT) + EnumValue Enum(openacc_kernels) String(parloops) Value(OPENACC_KERNELS_PARLOOPS) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 671ce193254f..1f6d2516c745 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -198,7 +198,7 @@ in the following sections. -aux-info @var{filename} -fallow-parameterless-variadic-functions @gol -fno-asm -fno-builtin -fno-builtin-@var{function} -fgimple@gol -fhosted -ffreestanding @gol --fopenacc -fopenacc-dim=@var{geom} @gol +-fopenacc -fopenacc-dim=@var{geom} -fopenacc-kernels=@var{style} @gol -fopenmp -fopenmp-simd @gol -fms-extensions -fplan9-extensions -fsso-struct=@var{endianness} @gol -fallow-single-precision -fcond-mismatch -flax-vector-conversions @gol @@ -2188,6 +2188,17 @@ to runtime, the environment variable @var{GOMP_OPENACC_DIM} can be set. It has the same format as the option value, except that '-' is not permitted. +@item -fopenacc-kernels=@var{style} +@opindex fopenacc-kernels +@cindex OpenACC accelerator programming +Configure OpenACC 'kernels' constructs handling. +With @option{-fopenacc-kernels=split}, OpenACC 'kernels' constructs +are split into a sequence of compute constructs, each then handled +individually. +With @option{-fopenacc-kernels=parloops}, the whole OpenACC +'kernels' constructs is handled by the @samp{parloops} pass. +This is the default. + @item -fopenmp @opindex fopenmp @cindex OpenMP parallel diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 24a80858ec65..ce326078cd37 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -358,6 +358,7 @@ enum cf_protection_level /* OpenACC 'kernels' constructs handling. */ enum openacc_kernels { + OPENACC_KERNELS_SPLIT, OPENACC_KERNELS_PARLOOPS }; #endif /* ! GCC_FLAG_TYPES_H */ diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index 227b99b5f2dd..e6f607da602a 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -651,7 +651,8 @@ Fortran LTO Joined Var(flag_openacc_dims) ; Documented in C fopenacc-kernels= -Fortran RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) Undocumented +Fortran RejectNegative Joined Enum(openacc_kernels) Var(flag_openacc_kernels) Init(OPENACC_KERNELS_PARLOOPS) +; Documented in C fopenmp Fortran LTO diff --git a/gcc/omp-oacc-kernels.c b/gcc/omp-oacc-kernels.c new file mode 100644 index 000000000000..d18037744427 --- /dev/null +++ b/gcc/omp-oacc-kernels.c @@ -0,0 +1,245 @@ +/* Transformation pass for OpenACC kernels regions. Converts a kernels + region into a series of smaller parallel regions. There is a parallel + region for each parallelizable loop nest, as well as a "gang-single" + parallel region for each non-parallelizable piece of code. + + Contributed by Gergö Barany and + Thomas Schwinge + + Copyright (C) 2019 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "fold-const.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "gomp-constants.h" + +/* This is a preprocessing pass to be run immediately before lower_omp. It + will convert OpenACC "kernels" regions into sequences of "parallel" + regions. + For now, the translation is as follows: + - The entire kernels region is turned into a data region with clauses + taken from the kernels region. New "create" clauses are added for all + variables declared at the top level in the kernels region. */ + +/* Transform KERNELS_REGION, which is an OpenACC kernels region, into a data + region containing the original kernels region. */ + +static gimple * +transform_kernels_region (gimple *kernels_region) +{ + gcc_checking_assert (gimple_omp_target_kind (kernels_region) + == GF_OMP_TARGET_KIND_OACC_KERNELS); + + /* Collect the kernels region's data clauses and create the new data + region with those clauses. */ + tree kernels_clauses = gimple_omp_target_clauses (kernels_region); + tree data_clauses = NULL; + for (tree c = kernels_clauses; c; c = OMP_CLAUSE_CHAIN (c)) + { + /* Certain map clauses are copied to the enclosing data region. Any + non-data clause remains on the kernels region. */ + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP) + { + tree decl = OMP_CLAUSE_DECL (c); + HOST_WIDE_INT kind = OMP_CLAUSE_MAP_KIND (c); + switch (kind) + { + default: + if (kind == GOMP_MAP_ALLOC && + integer_zerop (OMP_CLAUSE_SIZE (c))) + /* ??? This is an alloc clause for mapping a pointer whose + target is already mapped. We leave these on the inner + parallel regions because moving them to the outer data + region causes runtime errors. */ + break; + + /* For non-artificial variables, and for non-declaration + expressions like A[0:n], copy the clause to the data + region. */ + if ((DECL_P (decl) && !DECL_ARTIFICIAL (decl)) + || !DECL_P (decl)) + { + tree new_clause = build_omp_clause (OMP_CLAUSE_LOCATION (c), + OMP_CLAUSE_MAP); + OMP_CLAUSE_SET_MAP_KIND (new_clause, kind); + /* This must be unshared here to avoid "incorrect sharing + of tree nodes" errors from verify_gimple. */ + OMP_CLAUSE_DECL (new_clause) = unshare_expr (decl); + OMP_CLAUSE_SIZE (new_clause) = OMP_CLAUSE_SIZE (c); + OMP_CLAUSE_CHAIN (new_clause) = data_clauses; + data_clauses = new_clause; + + /* Now that this data is mapped, the inner data clause on + the kernels region can become a present clause. */ + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FORCE_PRESENT); + } + break; + + case GOMP_MAP_POINTER: + case GOMP_MAP_TO_PSET: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_FIRSTPRIVATE_POINTER: + case GOMP_MAP_FIRSTPRIVATE_REFERENCE: + /* ??? Copying these map kinds leads to internal compiler + errors in later passes. */ + break; + } + } + else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF) + { + /* If there is an if clause, it must also be present on the + enclosing data region. Temporarily remove the if clause's + chain to avoid copying it. */ + tree saved_chain = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = NULL; + tree new_if_clause = unshare_expr (c); + OMP_CLAUSE_CHAIN (c) = saved_chain; + OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses; + data_clauses = new_if_clause; + } + } + /* Restore the original order of the clauses. */ + data_clauses = nreverse (data_clauses); + + gimple *data_region + = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_DATA_KERNELS, + data_clauses); + gimple_set_location (data_region, gimple_location (kernels_region)); + + /* For now, just construct a new parallel region inside the data region. */ + gimple *inner_region + = gimple_build_omp_target (NULL, GF_OMP_TARGET_KIND_OACC_PARALLEL, + kernels_clauses); + gimple_set_location (inner_region, gimple_location (kernels_region)); + gimple_omp_set_body (inner_region, gimple_omp_body (kernels_region)); + + gbind *bind = gimple_build_bind (NULL, NULL, NULL); + gimple_bind_add_stmt (bind, inner_region); + + /* Put the transformed pieces together. The entire body of the region is + wrapped in a try-finally statement that calls __builtin_GOACC_data_end + for cleanup. */ + tree data_end_fn = builtin_decl_explicit (BUILT_IN_GOACC_DATA_END); + gimple *call = gimple_build_call (data_end_fn, 0); + gimple_seq cleanup = NULL; + gimple_seq_add_stmt (&cleanup, call); + gimple *try_stmt = gimple_build_try (bind, cleanup, GIMPLE_TRY_FINALLY); + gimple_omp_set_body (data_region, try_stmt); + + return data_region; +} + +/* Helper function of convert_oacc_kernels for walking the tree, calling + transform_kernels_region on each kernels region found. */ + +static tree +scan_kernels (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, + struct walk_stmt_info *) +{ + gimple *stmt = gsi_stmt (*gsi_p); + *handled_ops_p = false; + + int kind; + switch (gimple_code (stmt)) + { + case GIMPLE_OMP_TARGET: + kind = gimple_omp_target_kind (stmt); + if (kind == GF_OMP_TARGET_KIND_OACC_KERNELS) + { + gimple *new_region = transform_kernels_region (stmt); + gsi_replace (gsi_p, new_region, false); + *handled_ops_p = true; + } + break; + + default: + break; + } + + return NULL; +} + +/* Find and transform OpenACC kernels regions in the current function. */ + +static unsigned int +convert_oacc_kernels (void) +{ + struct walk_stmt_info wi; + gimple_seq body = gimple_body (current_function_decl); + + memset (&wi, 0, sizeof (wi)); + walk_gimple_seq_mod (&body, scan_kernels, NULL, &wi); + + gimple_set_body (current_function_decl, body); + + return 0; +} + +namespace { + +const pass_data pass_data_convert_oacc_kernels = +{ + GIMPLE_PASS, /* type */ + "convert_oacc_kernels", /* name */ + OPTGROUP_OMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_gimple_any, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_convert_oacc_kernels : public gimple_opt_pass +{ +public: + pass_convert_oacc_kernels (gcc::context *ctxt) + : gimple_opt_pass (pass_data_convert_oacc_kernels, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (flag_openacc + && flag_openacc_kernels == OPENACC_KERNELS_SPLIT); + } + virtual unsigned int execute (function *) + { + return convert_oacc_kernels (); + } + +}; // class pass_convert_oacc_kernels + +} // anon namespace + +gimple_opt_pass * +make_pass_convert_oacc_kernels (gcc::context *ctxt) +{ + return new pass_convert_oacc_kernels (ctxt); +} diff --git a/gcc/passes.def b/gcc/passes.def index bc147c4444db..f4c4b96d96d8 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_warn_unused_result); NEXT_PASS (pass_diagnose_omp_blocks); NEXT_PASS (pass_diagnose_tm_blocks); + NEXT_PASS (pass_convert_oacc_kernels); NEXT_PASS (pass_lower_omp); NEXT_PASS (pass_lower_cf); NEXT_PASS (pass_lower_tm); diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index 68cafc080262..1bcc3e949d1f 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -1,3 +1,10 @@ +2019-01-21 Gergö Barany + + * c-c++-common/goacc/kernels-conversion.c: New test. + * gfortran.dg/goacc/kernels-conversion.f95: Likewise. + * c-c++-common/goacc/if-clause-2.c: Update. + * gfortran.dg/goacc/kernels-tree.f95: Likewise. + 2019-01-30 Thomas Schwinge * c-c++-common/goacc/kernels-1.c: Add diff --git a/gcc/testsuite/c-c++-common/goacc/if-clause-2.c b/gcc/testsuite/c-c++-common/goacc/if-clause-2.c index 5ab8459d7323..e17b5dd1107a 100644 --- a/gcc/testsuite/c-c++-common/goacc/if-clause-2.c +++ b/gcc/testsuite/c-c++-common/goacc/if-clause-2.c @@ -1,3 +1,6 @@ +/* { dg-additional-options "-fopenacc-kernels=split" } */ +/* { dg-additional-options "-fdump-tree-convert_oacc_kernels" } */ + void f (short c) { @@ -9,3 +12,7 @@ f (short c) ; #pragma acc update device(c) if(c) } + +/* Verify that the 'if' clause gets duplicated. + { dg-final { scan-tree-dump-times "#pragma omp target oacc_data_kernels if\\(" 1 "convert_oacc_kernels" } } + { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel_kernels_gang_single .* if\\(" 1 "convert_oacc_kernels" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c b/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c new file mode 100644 index 000000000000..c75db375f260 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-conversion.c @@ -0,0 +1,36 @@ +/* { dg-additional-options "-fopenacc-kernels=split" } */ +/* { dg-additional-options "-fdump-tree-convert_oacc_kernels" } */ + +#define N 1024 + +unsigned int a[N]; + +int +main (void) +{ + int i; + unsigned int sum = 1; + +#pragma acc kernels copyin(a[0:N]) copy(sum) + { + #pragma acc loop + for (i = 0; i < N; ++i) + sum += a[i]; + + sum++; + + #pragma acc loop + for (i = 0; i < N; ++i) + sum += a[i]; + } + + return 0; +} + +/* Check that the kernels region is split into a data region and an enclosed + parallel region. */ +/* { dg-final { scan-tree-dump-times "oacc_data_kernels" 1 "convert_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-times "oacc_parallel" 1 "convert_oacc_kernels" } } */ + +/* Check that the original kernels region is removed. */ +/* { dg-final { scan-tree-dump-not "oacc_kernels" "convert_oacc_kernels" } } */ diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95 new file mode 100644 index 000000000000..8c663302a6f8 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-conversion.f95 @@ -0,0 +1,33 @@ +! { dg-additional-options "-fopenacc-kernels=split" } +! { dg-additional-options "-fdump-tree-convert_oacc_kernels" } + +program main + implicit none + integer, parameter :: N = 1024 + integer, dimension (1:N) :: a + integer :: i, sum + + !$acc kernels copyin(a(1:N)) copy(sum) + + !$acc loop + do i = 1, N + sum = sum + a(i) + end do + + sum = sum + 1 + + !$acc loop + do i = 1, N + sum = sum + a(i) + end do + + !$acc end kernels +end program main + +! Check that the kernels region is split into a data region and an enclosed +! parallel region. +! { dg-final { scan-tree-dump-times "oacc_data_kernels" 1 "convert_oacc_kernels" } } +! { dg-final { scan-tree-dump-times "oacc_parallel" 1 "convert_oacc_kernels" } } + +! Check that the original kernels region is removed. +! { dg-final { scan-tree-dump-not "oacc_kernels" "convert_oacc_kernels" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 index a70f1e737bd1..b83ca2d8f064 100644 --- a/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-tree.f95 @@ -1,5 +1,7 @@ ! { dg-do compile } ! { dg-additional-options "-fdump-tree-original" } +! { dg-additional-options "-fopenacc-kernels=split" } +! { dg-additional-options "-fdump-tree-convert_oacc_kernels" } program test implicit none @@ -33,3 +35,7 @@ end program test ! { dg-final { scan-tree-dump-times "map\\(alloc:t\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } } + +! Verify that the 'if' clause gets duplicated. +! { dg-final { scan-tree-dump-times "#pragma omp target oacc_data_kernels if\\(" 1 "convert_oacc_kernels" } } +! { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel_kernels_gang_single .* if\\(" 1 "convert_oacc_kernels" } } diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 3a0b3805d24d..f00c91994527 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -412,6 +412,7 @@ extern gimple_opt_pass *make_pass_lower_switch_O0 (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lower_vector (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lower_vector_ssa (gcc::context *ctxt); extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_convert_oacc_kernels (gcc::context *ctxt); extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt); extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt); -- 2.47.2