extern bool gcn_global_address_p (rtx);
extern tree gcn_goacc_adjust_propagation_record (tree record_type, bool sender,
const char *name);
-extern void gcn_goacc_adjust_gangprivate_decl (tree var);
+extern tree gcn_goacc_adjust_private_decl (tree var, int level);
extern void gcn_goacc_reduction (gcall *call);
extern bool gcn_hard_regno_rename_ok (unsigned int from_reg,
unsigned int to_reg);
return decl;
}
-void
-gcn_goacc_adjust_gangprivate_decl (tree var)
+tree
+gcn_goacc_adjust_private_decl (tree var, int level)
{
+ if (level != GOMP_DIM_GANG)
+ return var;
+
tree type = TREE_TYPE (var);
tree lds_type = build_qualified_type (type,
TYPE_QUALS_NO_ADDR_SPACE (type)
if (machfun)
machfun->use_flat_addressing = true;
+
+ return var;
}
/* }}} */
#undef TARGET_GOACC_ADJUST_PROPAGATION_RECORD
#define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
gcn_goacc_adjust_propagation_record
-#undef TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
-#define TARGET_GOACC_ADJUST_GANGPRIVATE_DECL gcn_goacc_adjust_gangprivate_decl
+#undef TARGET_GOACC_ADJUST_PRIVATE_DECL
+#define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl
#undef TARGET_GOACC_FORK_JOIN
#define TARGET_GOACC_FORK_JOIN gcn_fork_join
#undef TARGET_GOACC_REDUCTION
#include "fold-const.h"
#include "intl.h"
#include "opts.h"
+#include "tree-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
static unsigned vector_red_partition;
static GTY(()) rtx vector_red_sym;
+/* Shared memory block for gang-private variables. */
+static unsigned gang_private_shared_size;
+static unsigned gang_private_shared_align;
+static GTY(()) rtx gang_private_shared_sym;
+static hash_map<tree_decl_hash, unsigned int> gang_private_shared_hmap;
+
/* Global lock variable, needed for 128bit worker & gang reductions. */
static GTY(()) tree global_lock_var;
vector_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
vector_red_partition = 0;
+ gang_private_shared_sym = gen_rtx_SYMBOL_REF (Pmode, "__gang_private_shared");
+ SET_SYMBOL_DATA_AREA (gang_private_shared_sym, DATA_AREA_SHARED);
+ gang_private_shared_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
+
diagnose_openacc_conflict (TARGET_GOMP, "-mgomp");
diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack");
diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt");
write_shared_buffer (asm_out_file, vector_red_sym,
vector_red_align, vector_red_size);
+ if (gang_private_shared_size)
+ write_shared_buffer (asm_out_file, gang_private_shared_sym,
+ gang_private_shared_align, gang_private_shared_size);
+
if (need_softstack_decl)
{
write_var_marker (asm_out_file, false, true, "__nvptx_stacks");
return false;
}
+/* Implement TARGET_GOACC_ADJUST_PRIVATE_DECL. */
+
+static tree
+nvptx_goacc_adjust_private_decl (tree decl, int level)
+{
+ if (level != GOMP_DIM_GANG)
+ return decl;
+
+ /* Set "oacc gang-private" attribute for gang-private variable
+ declarations. */
+ if (!lookup_attribute ("oacc gang-private", DECL_ATTRIBUTES (decl)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Setting 'oacc gang-private' attribute for decl:");
+ print_generic_decl (dump_file, decl, TDF_SLIM);
+ fputc ('\n', dump_file);
+ }
+ tree id = get_identifier ("oacc gang-private");
+ DECL_ATTRIBUTES (decl) = tree_cons (id, NULL, DECL_ATTRIBUTES (decl));
+ }
+
+ return decl;
+}
+
+/* Implement TARGET_GOACC_EXPAND_VAR_DECL. */
+
+static rtx
+nvptx_goacc_expand_var_decl (tree var)
+{
+ /* Place "oacc gang-private" variables in shared memory. */
+ if (VAR_P (var)
+ && lookup_attribute ("oacc gang-private", DECL_ATTRIBUTES (var)))
+ {
+ unsigned int offset, *poffset;
+ poffset = gang_private_shared_hmap.get (var);
+ if (poffset)
+ offset = *poffset;
+ else
+ {
+ unsigned HOST_WIDE_INT align = DECL_ALIGN (var);
+ gang_private_shared_size
+ = (gang_private_shared_size + align - 1) & ~(align - 1);
+ if (gang_private_shared_align < align)
+ gang_private_shared_align = align;
+
+ offset = gang_private_shared_size;
+ bool existed = gang_private_shared_hmap.put (var, offset);
+ gcc_checking_assert (!existed);
+ gang_private_shared_size += tree_to_uhwi (DECL_SIZE_UNIT (var));
+ }
+ rtx addr = plus_constant (Pmode, gang_private_shared_sym, offset);
+ return gen_rtx_MEM (TYPE_MODE (TREE_TYPE (var)), addr);
+ }
+
+ return NULL_RTX;
+}
+
static GTY(()) tree nvptx_previous_fndecl;
static void
if (!fndecl || fndecl == nvptx_previous_fndecl)
return;
+ gang_private_shared_hmap.empty ();
nvptx_previous_fndecl = fndecl;
vector_red_partition = 0;
oacc_bcast_partition = 0;
#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+#undef TARGET_GOACC_ADJUST_PRIVATE_DECL
+#define TARGET_GOACC_ADJUST_PRIVATE_DECL nvptx_goacc_adjust_private_decl
+
+#undef TARGET_GOACC_EXPAND_VAR_DECL
+#define TARGET_GOACC_EXPAND_VAR_DECL nvptx_goacc_expand_var_decl
+
#undef TARGET_SET_CURRENT_FUNCTION
#define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function
constant of type @var{type}.
@end deftypefn
+@deftypefn {Target Hook} tree TARGET_GOACC_ADJUST_PRIVATE_DECL (tree @var{var}, int @var{level})
+This hook, if defined, is used by accelerator target back-ends to adjust
+OpenACC variable declarations that should be made private to the given
+parallelism level (i.e. @code{GOMP_DIM_GANG}, @code{GOMP_DIM_WORKER} or
+@code{GOMP_DIM_VECTOR}). A typical use for this hook is to force variable
+declarations at the @code{gang} level to reside in GPU shared memory.
+
+You may also use the @code{TARGET_GOACC_EXPAND_VAR_DECL} hook if the
+adjusted variable declaration needs to be expanded to RTL in a non-standard
+way.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GOACC_EXPAND_VAR_DECL (tree @var{var})
+This hook, if defined, is used by accelerator target back-ends to expand
+specially handled kinds of @code{VAR_DECL} expressions. A particular use is
+to place variables with specific attributes inside special accelarator
+memories. A return value of @code{NULL} indicates that the target does not
+handle this @code{VAR_DECL}, and normal RTL expanding is resumed.
+
+Only define this hook if your accelerator target needs to expand certain
+@code{VAR_DECL} nodes in a way that differs from the default. You can also adjust
+private variables at OpenACC device-lowering time using the
+@code{TARGET_GOACC_ADJUST_PRIVATE_DECL} target hook.
+@end deftypefn
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
@hook TARGET_PREFERRED_ELSE_VALUE
+@hook TARGET_GOACC_ADJUST_PRIVATE_DECL
+
+@hook TARGET_GOACC_EXPAND_VAR_DECL
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
exp = SSA_NAME_VAR (ssa_name);
goto expand_decl_rtl;
- case PARM_DECL:
case VAR_DECL:
+ /* Allow accel compiler to handle variables that require special
+ treatment, e.g. if they have been modified in some way earlier in
+ compilation by the adjust_private_decl OpenACC hook. */
+ if (flag_openacc && targetm.goacc.expand_var_decl)
+ {
+ temp = targetm.goacc.expand_var_decl (exp);
+ if (temp)
+ return temp;
+ }
+ /* ... fall through ... */
+
+ case PARM_DECL:
/* If a static var's type was incomplete when the decl was written,
but the type is complete now, lay out the decl now. */
if (DECL_SIZE (exp) == 0
else
gcc_unreachable ();
break;
+ case IFN_UNIQUE_OACC_PRIVATE:
+ break;
}
if (pattern)
or leaving partitioned execution.
DEP_VAR = UNIQUE ({HEAD,TAIL}_MARK, REMAINING_MARKS, ...PRIMARY_FLAGS)
- The PRIMARY_FLAGS only occur on the first HEAD_MARK of a sequence. */
+ The PRIMARY_FLAGS only occur on the first HEAD_MARK of a sequence.
+
+ PRIVATE captures variables to be made private at the surrounding parallelism
+ level. */
#define IFN_UNIQUE_CODES \
DEF(UNSPEC), \
DEF(OACC_FORK), DEF(OACC_JOIN), \
- DEF(OACC_HEAD_MARK), DEF(OACC_TAIL_MARK)
+ DEF(OACC_HEAD_MARK), DEF(OACC_TAIL_MARK), \
+ DEF(OACC_PRIVATE)
enum ifn_unique_kind {
#define DEF(X) IFN_UNIQUE_##X
/* Only used for omp target contexts. True if an OpenMP construct other
than teams is strictly nested in it. */
bool nonteams_nested_p;
+
+ /* Candidates for adjusting OpenACC privatization level. */
+ vec<tree> oacc_privatization_candidates;
};
static splay_tree all_contexts;
static void
lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
- gcall *fork, gcall *join, gimple_seq *fork_seq,
- gimple_seq *join_seq, omp_context *ctx)
+ gcall *fork, gcall *private_marker, gcall *join,
+ gimple_seq *fork_seq, gimple_seq *join_seq,
+ omp_context *ctx)
{
gimple_seq before_fork = NULL;
gimple_seq after_fork = NULL;
/* Now stitch things together. */
gimple_seq_add_seq (fork_seq, before_fork);
+ if (private_marker)
+ gimple_seq_add_stmt (fork_seq, private_marker);
if (fork)
gimple_seq_add_stmt (fork_seq, fork);
gimple_seq_add_seq (fork_seq, after_fork);
HEAD and TAIL. */
static void
-lower_oacc_head_tail (location_t loc, tree clauses,
+lower_oacc_head_tail (location_t loc, tree clauses, gcall *private_marker,
gimple_seq *head, gimple_seq *tail, omp_context *ctx)
{
bool inner = false;
gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
+
+ if (private_marker)
+ {
+ gimple_set_location (private_marker, loc);
+ gimple_call_set_lhs (private_marker, ddvar);
+ gimple_call_set_arg (private_marker, 1, ddvar);
+ }
+
tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
&join_seq);
lower_oacc_reductions (loc, clauses, place, inner,
- fork, join, &fork_seq, &join_seq, ctx);
+ fork, (count == 1) ? private_marker : NULL,
+ join, &fork_seq, &join_seq, ctx);
/* Append this level to head. */
gimple_seq_add_seq (head, fork_seq);
}
}
+/* Scan CLAUSES for candidates for adjusting OpenACC privatization level in
+ CTX. */
+
+static void
+oacc_privatization_scan_clause_chain (omp_context *ctx, tree clauses)
+{
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE)
+ {
+ tree decl = OMP_CLAUSE_DECL (c);
+ if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+ ctx->oacc_privatization_candidates.safe_push (decl);
+ }
+}
+
+/* Scan DECLS for candidates for adjusting OpenACC privatization level in
+ CTX. */
+
+static void
+oacc_privatization_scan_decl_chain (omp_context *ctx, tree decls)
+{
+ for (tree decl = decls; decl; decl = DECL_CHAIN (decl))
+ if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+ ctx->oacc_privatization_candidates.safe_push (decl);
+}
+
/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */
static tree
*dlist = new_dlist;
}
+/* Build an internal UNIQUE function with type IFN_UNIQUE_OACC_PRIVATE listing
+ the addresses of variables to be made private at the surrounding
+ parallelism level. Such functions appear in the gimple code stream in two
+ forms, e.g. for a partitioned loop:
+
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6, 1, 68);
+ .data_dep.6 = .UNIQUE (OACC_PRIVATE, .data_dep.6, -1, &w);
+ .data_dep.6 = .UNIQUE (OACC_FORK, .data_dep.6, -1);
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6);
+
+ or alternatively, OACC_PRIVATE can appear at the top level of a parallel,
+ not as part of a HEAD_MARK sequence:
+
+ .UNIQUE (OACC_PRIVATE, 0, 0, &w);
+
+ For such stand-alone appearances, the 3rd argument is always 0, denoting
+ gang partitioning. */
+
+static gcall *
+lower_oacc_private_marker (omp_context *ctx)
+{
+ if (ctx->oacc_privatization_candidates.length () == 0)
+ return NULL;
+
+ auto_vec<tree, 5> args;
+
+ args.quick_push (build_int_cst (integer_type_node, IFN_UNIQUE_OACC_PRIVATE));
+ args.quick_push (integer_zero_node);
+ args.quick_push (integer_minus_one_node);
+
+ int i;
+ tree decl;
+ FOR_EACH_VEC_ELT (ctx->oacc_privatization_candidates, i, decl)
+ {
+ for (omp_context *thisctx = ctx; thisctx; thisctx = thisctx->outer)
+ {
+ tree inner_decl = maybe_lookup_decl (decl, thisctx);
+ if (inner_decl)
+ {
+ decl = inner_decl;
+ break;
+ }
+ }
+ gcc_checking_assert (decl);
+
+ tree addr = build_fold_addr_expr (decl);
+ args.safe_push (addr);
+ }
+
+ return gimple_build_call_internal_vec (IFN_UNIQUE, args);
+}
+
/* Lower code for an OMP loop directive. */
static void
push_gimplify_context ();
+ oacc_privatization_scan_clause_chain (ctx, gimple_omp_for_clauses (stmt));
+
lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
block = make_node (BLOCK);
gbind *inner_bind
= as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
tree vars = gimple_bind_vars (inner_bind);
+ if (is_gimple_omp_oacc (ctx->stmt))
+ oacc_privatization_scan_decl_chain (ctx, vars);
gimple_bind_append_vars (new_stmt, vars);
/* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
keep them on the inner_bind and it's block. */
lower_omp (gimple_omp_body_ptr (stmt), ctx);
+ gcall *private_marker = NULL;
+ if (is_gimple_omp_oacc (ctx->stmt)
+ && !gimple_seq_empty_p (omp_for_body))
+ private_marker = lower_oacc_private_marker (ctx);
+
/* Lower the header expressions. At this point, we can assume that
the header is of the form:
if (is_gimple_omp_oacc (ctx->stmt)
&& !ctx_in_oacc_kernels_region (ctx))
lower_oacc_head_tail (gimple_location (stmt),
- gimple_omp_for_clauses (stmt),
+ gimple_omp_for_clauses (stmt), private_marker,
&oacc_head, &oacc_tail, ctx);
/* Add OpenACC partitioning and reduction markers just before the loop. */
them as a dummy GANG loop. */
tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
+ gcall *private_marker = lower_oacc_private_marker (ctx);
+
+ if (private_marker)
+ gimple_call_set_arg (private_marker, 2, level);
+
lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
- false, NULL, NULL, &fork_seq, &join_seq, ctx);
+ false, NULL, private_marker, NULL, &fork_seq,
+ &join_seq, ctx);
}
gimple_seq_add_seq (&new_body, fork_seq);
ctx);
break;
case GIMPLE_BIND:
+ if (ctx && is_gimple_omp_oacc (ctx->stmt))
+ {
+ tree vars = gimple_bind_vars (as_a <gbind *> (stmt));
+ oacc_privatization_scan_decl_chain (ctx, vars);
+ }
lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
maybe_remove_omp_member_access_dummy_vars (as_a <gbind *> (stmt));
break;
#include "attribs.h"
#include "cfgloop.h"
#include "context.h"
+#include "convert.h"
/* Describe the OpenACC looping structure of a function. The entire
function is held in a 'NULL' loop. */
= ((enum ifn_unique_kind)
TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
- if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
+ if (k == IFN_UNIQUE_OACC_FORK
+ || k == IFN_UNIQUE_OACC_JOIN
+ || k == IFN_UNIQUE_OACC_PRIVATE)
*gimple_call_arg_ptr (stmt, 2) = replacement;
else if (k == kind && stmt != from)
break;
gsi_replace_with_seq (&gsi, seq, true);
}
+struct var_decl_rewrite_info
+{
+ gimple *stmt;
+ hash_map<tree, tree> *adjusted_vars;
+ bool avoid_pointer_conversion;
+ bool modified;
+};
+
+/* Helper function for execute_oacc_device_lower. Rewrite VAR_DECLs (by
+ themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
+ the var_decl_rewrite_info pointed to via DATA. Used as part of coercing
+ gang-private variables in OpenACC offload regions to reside in GPU shared
+ memory. */
+
+static tree
+oacc_rewrite_var_decl (tree *tp, int *walk_subtrees, void *data)
+{
+ walk_stmt_info *wi = (walk_stmt_info *) data;
+ var_decl_rewrite_info *info = (var_decl_rewrite_info *) wi->info;
+
+ if (TREE_CODE (*tp) == ADDR_EXPR)
+ {
+ tree arg = TREE_OPERAND (*tp, 0);
+ tree *new_arg = info->adjusted_vars->get (arg);
+
+ if (new_arg)
+ {
+ if (info->avoid_pointer_conversion)
+ {
+ *tp = build_fold_addr_expr (*new_arg);
+ info->modified = true;
+ *walk_subtrees = 0;
+ }
+ else
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
+ tree repl = build_fold_addr_expr (*new_arg);
+ gimple *stmt1
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
+ tree conv = convert_to_pointer (TREE_TYPE (*tp),
+ gimple_assign_lhs (stmt1));
+ gimple *stmt2
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
+ gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
+ gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
+ *tp = gimple_assign_lhs (stmt2);
+ info->modified = true;
+ *walk_subtrees = 0;
+ }
+ }
+ }
+ else if (TREE_CODE (*tp) == COMPONENT_REF || TREE_CODE (*tp) == ARRAY_REF)
+ {
+ tree *base = &TREE_OPERAND (*tp, 0);
+
+ while (TREE_CODE (*base) == COMPONENT_REF
+ || TREE_CODE (*base) == ARRAY_REF)
+ base = &TREE_OPERAND (*base, 0);
+
+ if (TREE_CODE (*base) != VAR_DECL)
+ return NULL;
+
+ tree *new_decl = info->adjusted_vars->get (*base);
+ if (!new_decl)
+ return NULL;
+
+ int base_quals = TYPE_QUALS (TREE_TYPE (*new_decl));
+ tree field = TREE_OPERAND (*tp, 1);
+
+ /* Adjust the type of the field. */
+ int field_quals = TYPE_QUALS (TREE_TYPE (field));
+ if (TREE_CODE (field) == FIELD_DECL && field_quals != base_quals)
+ {
+ tree *field_type = &TREE_TYPE (field);
+ while (TREE_CODE (*field_type) == ARRAY_TYPE)
+ field_type = &TREE_TYPE (*field_type);
+ field_quals |= base_quals;
+ *field_type = build_qualified_type (*field_type, field_quals);
+ }
+
+ /* Adjust the type of the component ref itself. */
+ tree comp_type = TREE_TYPE (*tp);
+ int comp_quals = TYPE_QUALS (comp_type);
+ if (TREE_CODE (*tp) == COMPONENT_REF && comp_quals != base_quals)
+ {
+ comp_quals |= base_quals;
+ TREE_TYPE (*tp)
+ = build_qualified_type (comp_type, comp_quals);
+ }
+
+ *base = *new_decl;
+ info->modified = true;
+ }
+ else if (TREE_CODE (*tp) == VAR_DECL)
+ {
+ tree *new_decl = info->adjusted_vars->get (*tp);
+ if (new_decl)
+ {
+ *tp = *new_decl;
+ info->modified = true;
+ }
+ }
+
+ return NULL_TREE;
+}
+
+/* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
+
+static bool
+is_sync_builtin_call (gcall *call)
+{
+ tree callee = gimple_call_fndecl (call);
+
+ if (callee != NULL_TREE
+ && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+ switch (DECL_FUNCTION_CODE (callee))
+ {
+#undef DEF_SYNC_BUILTIN
+#define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
+#include "sync-builtins.def"
+#undef DEF_SYNC_BUILTIN
+ return true;
+
+ default:
+ ;
+ }
+
+ return false;
+}
+
/* Main entry point for oacc transformations which run on the device
compiler after LTO, so we know what the target device is at this
point (including the host fallback). */
dominance information to update SSA. */
calculate_dominance_info (CDI_DOMINATORS);
+ hash_map<tree, tree> adjusted_vars;
+
/* Now lower internal loop functions to target-specific code
sequences. */
basic_block bb;
case IFN_UNIQUE_OACC_TAIL_MARK:
remove = true;
break;
+
+ case IFN_UNIQUE_OACC_PRIVATE:
+ {
+ HOST_WIDE_INT level
+ = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
+ if (level == -1)
+ break;
+ for (unsigned i = 3;
+ i < gimple_call_num_args (call);
+ i++)
+ {
+ tree arg = gimple_call_arg (call, i);
+ gcc_checking_assert (TREE_CODE (arg) == ADDR_EXPR);
+ tree decl = TREE_OPERAND (arg, 0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ static char const *const axes[] =
+ /* Must be kept in sync with GOMP_DIM
+ enumeration. */
+ { "gang", "worker", "vector" };
+ fprintf (dump_file, "Decl UID %u has %s "
+ "partitioning:", DECL_UID (decl),
+ axes[level]);
+ print_generic_decl (dump_file, decl, TDF_SLIM);
+ fputc ('\n', dump_file);
+ }
+ if (targetm.goacc.adjust_private_decl)
+ {
+ tree oldtype = TREE_TYPE (decl);
+ tree newdecl
+ = targetm.goacc.adjust_private_decl (decl, level);
+ if (TREE_TYPE (newdecl) != oldtype
+ || newdecl != decl)
+ adjusted_vars.put (decl, newdecl);
+ }
+ }
+ remove = true;
+ }
+ break;
}
break;
}
gsi_next (&gsi);
}
+ /* Make adjustments to gang-private local variables if required by the
+ target, e.g. forcing them into a particular address space. Afterwards,
+ ADDR_EXPR nodes which have adjusted variables as their argument need to
+ be modified in one of two ways:
+
+ 1. They can be recreated, making a pointer to the variable in the new
+ address space, or
+
+ 2. The address of the variable in the new address space can be taken,
+ converted to the default (original) address space, and the result of
+ that conversion subsituted in place of the original ADDR_EXPR node.
+
+ Which of these is done depends on the gimple statement being processed.
+ At present atomic operations and inline asms use (1), and everything else
+ uses (2). At least on AMD GCN, there are atomic operations that work
+ directly in the LDS address space.
+
+ COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
+ the new decl, adjusting types of appropriate tree nodes as necessary. */
+
+ if (targetm.goacc.adjust_private_decl)
+ {
+ FOR_ALL_BB_FN (bb, cfun)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ walk_stmt_info wi;
+ var_decl_rewrite_info info;
+
+ info.avoid_pointer_conversion
+ = (is_gimple_call (stmt)
+ && is_sync_builtin_call (as_a <gcall *> (stmt)))
+ || gimple_code (stmt) == GIMPLE_ASM;
+ info.stmt = stmt;
+ info.modified = false;
+ info.adjusted_vars = &adjusted_vars;
+
+ memset (&wi, 0, sizeof (wi));
+ wi.info = &info;
+
+ walk_gimple_op (stmt, oacc_rewrite_var_decl, &wi);
+
+ if (info.modified)
+ update_stmt (stmt);
+ }
+ }
+
free_oacc_loop (loops);
return 0;
void, (gcall *call),
default_goacc_reduction)
+DEFHOOK
+(adjust_private_decl,
+"This hook, if defined, is used by accelerator target back-ends to adjust\n\
+OpenACC variable declarations that should be made private to the given\n\
+parallelism level (i.e. @code{GOMP_DIM_GANG}, @code{GOMP_DIM_WORKER} or\n\
+@code{GOMP_DIM_VECTOR}). A typical use for this hook is to force variable\n\
+declarations at the @code{gang} level to reside in GPU shared memory.\n\
+\n\
+You may also use the @code{TARGET_GOACC_EXPAND_VAR_DECL} hook if the\n\
+adjusted variable declaration needs to be expanded to RTL in a non-standard\n\
+way.",
+tree, (tree var, int level),
+NULL)
+
+DEFHOOK
+(expand_var_decl,
+"This hook, if defined, is used by accelerator target back-ends to expand\n\
+specially handled kinds of @code{VAR_DECL} expressions. A particular use is\n\
+to place variables with specific attributes inside special accelarator\n\
+memories. A return value of @code{NULL} indicates that the target does not\n\
+handle this @code{VAR_DECL}, and normal RTL expanding is resumed.\n\
+\n\
+Only define this hook if your accelerator target needs to expand certain\n\
+@code{VAR_DECL} nodes in a way that differs from the default. You can also adjust\n\
+private variables at OpenACC device-lowering time using the\n\
+@code{TARGET_GOACC_ADJUST_PRIVATE_DECL} target hook.",
+rtx, (tree var),
+NULL)
+
HOOK_VECTOR_END (goacc)
/* Functions relating to vectorization. */
--- /dev/null
+#include <assert.h>
+
+int main (void)
+{
+ int ret;
+
+ #pragma acc parallel num_gangs(1) num_workers(32) copyout(ret)
+ {
+ int w = 0;
+
+ #pragma acc loop worker
+ for (int i = 0; i < 32; i++)
+ {
+ #pragma acc atomic update
+ w++;
+ }
+
+ ret = (w == 32);
+ }
+ assert (ret);
+
+ #pragma acc parallel num_gangs(1) vector_length(32) copyout(ret)
+ {
+ int v = 0;
+
+ #pragma acc loop vector
+ for (int i = 0; i < 32; i++)
+ {
+ #pragma acc atomic update
+ v++;
+ }
+
+ ret = (v == 32);
+ }
+ assert (ret);
+
+ return 0;
+}
--- /dev/null
+! Test for "oacc gang-private" attribute on gang-private variables
+
+! { dg-do run }
+! { dg-additional-options "-fdump-tree-oaccdevlow-details -w" }
+
+program main
+ integer :: w, arr(0:31)
+
+ !$acc parallel num_gangs(32) num_workers(32) copyout(arr)
+ !$acc loop gang private(w)
+! { dg-final { scan-tree-dump-times "Decl UID \[0-9\]+ has gang partitioning: integer\\(kind=4\\) w;" 1 "oaccdevlow" } } */
+ do j = 0, 31
+ w = 0
+ !$acc loop seq
+ do i = 0, 31
+ !$acc atomic update
+ w = w + 1
+ !$acc end atomic
+ end do
+ arr(j) = w
+ end do
+ !$acc end parallel
+
+ if (any (arr .ne. 32)) stop 1
+end program main
--- /dev/null
+! Test for worker-private variables
+
+! { dg-do run }
+! { dg-additional-options "-fdump-tree-oaccdevlow-details" }
+
+program main
+ integer :: w, arr(0:31)
+
+ !$acc parallel num_gangs(32) num_workers(32) copyout(arr)
+ !$acc loop gang worker private(w)
+! { dg-final { scan-tree-dump-times "Decl UID \[0-9\]+ has worker partitioning: integer\\(kind=4\\) w;" 1 "oaccdevlow" } } */
+ do j = 0, 31
+ w = 0
+ !$acc loop seq
+ do i = 0, 31
+ !$acc atomic update
+ w = w + 1
+ ! nvptx offloading: PR83812 "operation not supported on global/shared address space".
+ ! { dg-output "(\n|\r\n|\r)libgomp: cuStreamSynchronize error: operation not supported on global/shared address space(\n|\r\n|\r)$" { target openacc_nvidia_accel_selected } }
+ ! Scan for what we expect in the "XFAILed" case (without actually XFAILing).
+ ! { dg-shouldfail "XFAILed" { openacc_nvidia_accel_selected } }
+ ! ... instead of 'dg-xfail-run-if' so that 'dg-output' is evaluated at all.
+ ! { dg-final { if { [dg-process-target { xfail openacc_nvidia_accel_selected }] == "F" } { xfail "[testname-for-summary] really is XFAILed" } } }
+ ! ... so that we still get an XFAIL visible in the log.
+ !$acc end atomic
+ end do
+ arr(j) = w
+ end do
+ !$acc end parallel
+
+ if (any (arr .ne. 32)) stop 1
+end program main