handle_omp_declare_target_attribute, NULL },
{ "omp declare target implicit", 0, 0, true, false, false, false,
handle_omp_declare_target_attribute, NULL },
+ { "omp declare target indirect", 0, 0, true, false, false, false,
+ handle_omp_declare_target_attribute, NULL },
{ "omp declare target host", 0, 0, true, false, false, false,
handle_omp_declare_target_attribute, NULL },
{ "omp declare target nohost", 0, 0, true, false, false, false,
PRAGMA_OMP_CLAUSE_IF,
PRAGMA_OMP_CLAUSE_IN_REDUCTION,
PRAGMA_OMP_CLAUSE_INBRANCH,
+ PRAGMA_OMP_CLAUSE_INDIRECT,
PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR,
PRAGMA_OMP_CLAUSE_LASTPRIVATE,
PRAGMA_OMP_CLAUSE_LINEAR,
attributes
= tree_cons (get_identifier ("omp declare target nohost"),
NULL_TREE, attributes);
+
+ int indirect
+ = current_omp_declare_target_attribute->last ().indirect;
+ if (indirect && !lookup_attribute ("omp declare target indirect",
+ attributes))
+ attributes
+ = tree_cons (get_identifier ("omp declare target indirect"),
+ NULL_TREE, attributes);
}
}
struct GTY(()) c_omp_declare_target_attr {
bool attr_syntax;
int device_type;
+ int indirect;
};
struct GTY(()) c_omp_begin_assumes_data {
result = PRAGMA_OMP_CLAUSE_IN_REDUCTION;
else if (!strcmp ("inbranch", p))
result = PRAGMA_OMP_CLAUSE_INBRANCH;
+ else if (!strcmp ("indirect", p))
+ result = PRAGMA_OMP_CLAUSE_INDIRECT;
else if (!strcmp ("independent", p))
result = PRAGMA_OACC_CLAUSE_INDEPENDENT;
else if (!strcmp ("is_device_ptr", p))
return list;
}
+/* OpenMP 5.1:
+ indirect [( expression )]
+*/
+
+static tree
+c_parser_omp_clause_indirect (c_parser *parser, tree list)
+{
+ location_t location = c_parser_peek_token (parser)->location;
+ tree t;
+
+ if (c_parser_peek_token (parser)->type == CPP_OPEN_PAREN)
+ {
+ matching_parens parens;
+ if (!parens.require_open (parser))
+ return list;
+
+ location_t loc = c_parser_peek_token (parser)->location;
+ c_expr expr = c_parser_expr_no_commas (parser, NULL);
+ expr = convert_lvalue_to_rvalue (loc, expr, true, true);
+ t = c_objc_common_truthvalue_conversion (loc, expr.value);
+ t = c_fully_fold (t, false, NULL);
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (t))
+ || TREE_CODE (t) != INTEGER_CST)
+ {
+ c_parser_error (parser, "expected constant logical expression");
+ return list;
+ }
+ parens.skip_until_found_close (parser);
+ }
+ else
+ t = integer_one_node;
+
+ check_no_duplicate_clause (list, OMP_CLAUSE_INDIRECT, "indirect");
+
+ tree c = build_omp_clause (location, OMP_CLAUSE_INDIRECT);
+ OMP_CLAUSE_INDIRECT_EXPR (c) = t;
+ OMP_CLAUSE_CHAIN (c) = list;
+
+ return c;
+}
+
/* OpenACC, OpenMP 2.5:
if ( expression )
true, clauses);
c_name = "in_reduction";
break;
+ case PRAGMA_OMP_CLAUSE_INDIRECT:
+ clauses = c_parser_omp_clause_indirect (parser, clauses);
+ c_name = "indirect";
+ break;
case PRAGMA_OMP_CLAUSE_LASTPRIVATE:
clauses = c_parser_omp_clause_lastprivate (parser, clauses);
c_name = "lastprivate";
( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_TO) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ENTER) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_LINK) \
- | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE))
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT))
static void
c_parser_omp_declare_target (c_parser *parser)
{
tree clauses = NULL_TREE;
int device_type = 0;
- bool only_device_type = true;
+ bool indirect = false;
+ bool only_device_type_or_indirect = true;
if (c_parser_next_token_is (parser, CPP_NAME)
|| (c_parser_next_token_is (parser, CPP_COMMA)
&& c_parser_peek_2nd_token (parser)->type == CPP_NAME))
{
bool attr_syntax = parser->in_omp_attribute_pragma != NULL;
c_parser_skip_to_pragma_eol (parser);
- c_omp_declare_target_attr attr = { attr_syntax, -1 };
+ c_omp_declare_target_attr attr = { attr_syntax, -1, 0 };
vec_safe_push (current_omp_declare_target_attribute, attr);
return;
}
- for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
- device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
+ device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT)
+ indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c));
+ }
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ {
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT)
continue;
tree t = OMP_CLAUSE_DECL (c), id;
tree at1 = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (t));
tree at2 = lookup_attribute ("omp declare target link",
DECL_ATTRIBUTES (t));
- only_device_type = false;
+ only_device_type_or_indirect = false;
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINK)
{
id = get_identifier ("omp declare target link");
= tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t));
}
}
+ if (indirect)
+ {
+ tree at4 = lookup_attribute ("omp declare target indirect",
+ DECL_ATTRIBUTES (t));
+ if (at4 == NULL_TREE)
+ {
+ id = get_identifier ("omp declare target indirect");
+ DECL_ATTRIBUTES (t)
+ = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t));
+ }
+ }
}
- if (device_type && only_device_type)
+ if ((device_type || indirect) && only_device_type_or_indirect)
error_at (OMP_CLAUSE_LOCATION (clauses),
- "directive with only %<device_type%> clause");
+ "directive with only %<device_type%> or %<indirect%> clauses");
+ if (indirect && device_type && device_type != OMP_CLAUSE_DEVICE_TYPE_ANY)
+ error_at (OMP_CLAUSE_LOCATION (clauses),
+ "%<device_type%> clause must specify 'any' when used with "
+ "an %<indirect%> clause");
}
/* OpenMP 5.1
#pragma omp begin declare target clauses[optseq] new-line */
#define OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK \
- (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE)
+ ( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT))
static void
c_parser_omp_begin (c_parser *parser)
OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK,
"#pragma omp begin declare target");
int device_type = 0;
+ int indirect = 0;
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
- device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
- c_omp_declare_target_attr attr = { attr_syntax, device_type };
+ {
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
+ device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT)
+ indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c));
+ }
+ c_omp_declare_target_attr attr = { attr_syntax, device_type,
+ indirect };
vec_safe_push (current_omp_declare_target_attribute, attr);
}
else
case OMP_CLAUSE_IF_PRESENT:
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE_NOHOST:
+ case OMP_CLAUSE_INDIRECT:
pc = &OMP_CLAUSE_CHAIN (c);
continue;
static void
process_asm (FILE *in, FILE *out, FILE *cfile)
{
- int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0;
+ int fn_count = 0, var_count = 0, ind_fn_count = 0;
+ int dims_count = 0, regcount_count = 0;
struct obstack fns_os, dims_os, regcounts_os;
obstack_init (&fns_os);
obstack_init (&dims_os);
{ IN_CODE,
IN_METADATA,
IN_VARS,
- IN_FUNCS
+ IN_FUNCS,
+ IN_IND_FUNCS,
} state = IN_CODE;
while (fgets (buf, sizeof (buf), in))
{
}
break;
}
+ case IN_IND_FUNCS:
+ {
+ char *funcname;
+ if (sscanf (buf, "\t.8byte\t%ms\n", &funcname))
+ {
+ fputs (buf, out);
+ ind_fn_count++;
+ continue;
+ }
+ break;
+ }
}
char dummy;
".offload_func_table:\n",
out);
}
+ else if (sscanf (buf, " .section .gnu.offload_ind_funcs%c", &dummy) > 0)
+ {
+ state = IN_IND_FUNCS;
+ fputs (buf, out);
+ fputs ("\t.global .offload_ind_func_table\n"
+ "\t.type .offload_ind_func_table, @object\n"
+ ".offload_ind_func_table:\n",
+ out);
+ }
else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
{
state = IN_METADATA;
fprintf (cfile, "#include <stdbool.h>\n\n");
fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
+ fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count);
/* Dump out function idents. */
fprintf (cfile, "static const struct hsa_kernel_description {\n"
" const struct gcn_image *gcn_image;\n"
" unsigned kernel_count;\n"
" const struct hsa_kernel_description *kernel_infos;\n"
+ " unsigned ind_func_count;\n"
" unsigned global_variable_count;\n"
"} gcn_data = {\n"
" %d,\n"
" &gcn_image,\n"
" sizeof (gcn_kernels) / sizeof (gcn_kernels[0]),\n"
" gcn_kernels,\n"
+ " gcn_num_ind_funcs,\n"
" gcn_num_vars\n"
"};\n\n", omp_requires);
};
static id_map *func_ids, **funcs_tail = &func_ids;
+static id_map *ind_func_ids, **ind_funcs_tail = &ind_func_ids;
static id_map *var_ids, **vars_tail = &var_ids;
/* Files to unlink. */
output_fn_ptr = true;
record_id (input + i + 9, &funcs_tail);
}
+ else if (startswith (input + i, "IND_FUNC_MAP "))
+ {
+ output_fn_ptr = true;
+ record_id (input + i + 13, &ind_funcs_tail);
+ }
else
abort ();
/* Skip to next line. */
fprintf (out, "};\\n\";\n\n");
}
+ if (ind_func_ids)
+ {
+ const char needle[] = "// BEGIN GLOBAL FUNCTION DECL: ";
+
+ fprintf (out, "static const char ptx_code_%u[] =\n", obj_count++);
+ fprintf (out, "\t\".version ");
+ for (size_t i = 0; version[i] != '\0' && version[i] != '\n'; i++)
+ fputc (version[i], out);
+ fprintf (out, "\"\n\t\".target sm_");
+ for (size_t i = 0; sm_ver[i] != '\0' && sm_ver[i] != '\n'; i++)
+ fputc (sm_ver[i], out);
+ fprintf (out, "\"\n\t\".file 2 \\\"<dummy>\\\"\"\n");
+
+ /* WORKAROUND - see PR 108098
+ It seems as if older CUDA JIT compiler optimizes the function pointers
+ in offload_func_table to NULL, which can be prevented by adding a
+ dummy procedure. With CUDA 11.1, it seems to work fine without
+ workaround while CUDA 10.2 as some ancient version have need the
+ workaround. Assuming CUDA 11.0 fixes it, emitting it could be
+ restricted to 'if (sm_ver2[0] < 8 && version2[0] < 7)' as sm_80 and
+ PTX ISA 7.0 are new in CUDA 11.0; for 11.1 it would be sm_86 and
+ PTX ISA 7.1. */
+ fprintf (out, "\n\t\".func __dummy$func2 ( );\"\n");
+ fprintf (out, "\t\".func __dummy$func2 ( )\"\n");
+ fprintf (out, "\t\"{\"\n");
+ fprintf (out, "\t\"}\"\n");
+
+ size_t fidx = 0;
+ for (id = ind_func_ids; id; id = id->next)
+ {
+ fprintf (out, "\t\".extern ");
+ const char *p = input + file_idx[fidx];
+ while (true)
+ {
+ p = strstr (p, needle);
+ if (!p)
+ {
+ fidx++;
+ if (fidx >= file_cnt)
+ break;
+ p = input + file_idx[fidx];
+ continue;
+ }
+ p += strlen (needle);
+ if (!startswith (p, id->ptx_name))
+ continue;
+ p += strlen (id->ptx_name);
+ if (*p != '\n')
+ continue;
+ p++;
+ /* Skip over any directives. */
+ while (!startswith (p, ".func"))
+ while (*p++ != ' ');
+ for (; *p != '\0' && *p != '\n'; p++)
+ fputc (*p, out);
+ break;
+ }
+ fprintf (out, "\"\n");
+ if (fidx == file_cnt)
+ fatal_error (input_location,
+ "Cannot find function declaration for %qs",
+ id->ptx_name);
+ }
+
+ fprintf (out, "\t\".visible .global .align 8 .u64 "
+ "$offload_ind_func_table[] = {");
+ for (comma = "", id = ind_func_ids; id; comma = ",", id = id->next)
+ fprintf (out, "%s\"\n\t\t\"%s", comma, id->ptx_name);
+ fprintf (out, "};\\n\";\n\n");
+ }
+
/* Dump out array of pointers to ptx object strings. */
fprintf (out, "static const struct ptx_obj {\n"
" const char *code;\n"
id->dim ? id->dim : "");
fprintf (out, "\n};\n\n");
+ /* Dump out indirect function idents. */
+ fprintf (out, "static const char *const ind_func_mappings[] = {");
+ for (comma = "", id = ind_func_ids; id; comma = ",", id = id->next)
+ fprintf (out, "%s\n\t\"%s\"", comma, id->ptx_name);
+ fprintf (out, "\n};\n\n");
+
fprintf (out,
"static const struct nvptx_data {\n"
" uintptr_t omp_requires_mask;\n"
" unsigned var_num;\n"
" const struct nvptx_fn *fn_names;\n"
" unsigned fn_num;\n"
+ " unsigned ind_fn_num;\n"
"} nvptx_data = {\n"
" %d, ptx_objs, sizeof (ptx_objs) / sizeof (ptx_objs[0]),\n"
" var_mappings,"
" sizeof (var_mappings) / sizeof (var_mappings[0]),\n"
" func_mappings,"
- " sizeof (func_mappings) / sizeof (func_mappings[0])\n"
+ " sizeof (func_mappings) / sizeof (func_mappings[0]),\n"
+ " sizeof (ind_func_mappings) / sizeof (ind_func_mappings[0])\n"
"};\n\n", omp_requires);
fprintf (out, "#ifdef __cplusplus\n"
/* OpenMP offloading does not set this attribute. */
tree dims = attr ? TREE_VALUE (attr) : NULL_TREE;
- fprintf (asm_out_file, "//:FUNC_MAP \"%s\"",
+ fprintf (asm_out_file, "//:");
+ if (lookup_attribute ("omp declare target indirect",
+ DECL_ATTRIBUTES (decl)))
+ fprintf (asm_out_file, "IND_");
+ fprintf (asm_out_file, "FUNC_MAP \"%s\"",
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
for (; dims; dims = TREE_CHAIN (dims))
struct GTY(()) cp_omp_declare_target_attr {
bool attr_syntax;
int device_type;
+ bool indirect;
};
struct GTY(()) cp_omp_begin_assumes_data {
attributes
= tree_cons (get_identifier ("omp declare target nohost"),
NULL_TREE, attributes);
+ if (last.indirect
+ && !lookup_attribute ("omp declare target indirect",
+ attributes))
+ attributes
+ = tree_cons (get_identifier ("omp declare target indirect"),
+ NULL_TREE, attributes);
}
}
}
result = PRAGMA_OMP_CLAUSE_IN_REDUCTION;
else if (!strcmp ("inbranch", p))
result = PRAGMA_OMP_CLAUSE_INBRANCH;
+ else if (!strcmp ("indirect", p))
+ result = PRAGMA_OMP_CLAUSE_INDIRECT;
else if (!strcmp ("independent", p))
result = PRAGMA_OACC_CLAUSE_INDEPENDENT;
else if (!strcmp ("is_device_ptr", p))
return c;
}
+/* OpenMP 5.1:
+ indirect [( expression )]
+*/
+
+static tree
+cp_parser_omp_clause_indirect (cp_parser *parser, tree list,
+ location_t location)
+{
+ tree t;
+
+ if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN))
+ {
+ matching_parens parens;
+ if (!parens.require_open (parser))
+ return list;
+
+ bool non_constant_p;
+ t = cp_parser_constant_expression (parser, true, &non_constant_p);
+
+ if (t != error_mark_node && non_constant_p)
+ error_at (location, "expected constant logical expression");
+
+ if (t == error_mark_node
+ || !parens.require_close (parser))
+ cp_parser_skip_to_closing_parenthesis (parser, /*recovering=*/true,
+ /*or_comma=*/false,
+ /*consume_paren=*/true);
+ }
+ else
+ t = integer_one_node;
+
+ check_no_duplicate_clause (list, OMP_CLAUSE_INDIRECT, "indirect", location);
+
+ tree c = build_omp_clause (location, OMP_CLAUSE_INDIRECT);
+ OMP_CLAUSE_INDIRECT_EXPR (c) = t;
+ OMP_CLAUSE_CHAIN (c) = list;
+
+ return c;
+}
+
/* OpenMP 2.5:
if ( expression )
true, clauses);
c_name = "in_reduction";
break;
+ case PRAGMA_OMP_CLAUSE_INDIRECT:
+ clauses = cp_parser_omp_clause_indirect (parser, clauses,
+ token->location);
+ c_name = "indirect";
+ break;
case PRAGMA_OMP_CLAUSE_LASTPRIVATE:
clauses = cp_parser_omp_clause_lastprivate (parser, clauses);
c_name = "lastprivate";
on #pragma omp declare target. Return false if errors were reported. */
static bool
-handle_omp_declare_target_clause (tree c, tree t, int device_type)
+handle_omp_declare_target_clause (tree c, tree t, int device_type,
+ bool indirect)
{
tree at1 = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (t));
tree at2 = lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t));
DECL_ATTRIBUTES (t) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t));
}
}
+ if (indirect)
+ {
+ tree at4 = lookup_attribute ("omp declare target indirect",
+ DECL_ATTRIBUTES (t));
+ if (at4 == NULL_TREE)
+ {
+ id = get_identifier ("omp declare target indirect");
+ DECL_ATTRIBUTES (t)
+ = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (t));
+ }
+ }
return true;
}
( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_TO) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ENTER) \
| (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_LINK) \
- | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE))
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT))
static void
cp_parser_omp_declare_target (cp_parser *parser, cp_token *pragma_tok)
{
tree clauses = NULL_TREE;
int device_type = 0;
- bool only_device_type = true;
+ bool indirect = false;
+ bool only_device_type_or_indirect = true;
if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
|| (cp_lexer_next_token_is (parser->lexer, CPP_COMMA)
&& cp_lexer_nth_token_is (parser->lexer, 2, CPP_NAME)))
else
{
cp_omp_declare_target_attr a
- = { parser->lexer->in_omp_attribute_pragma, -1 };
+ = { parser->lexer->in_omp_attribute_pragma, -1, false };
vec_safe_push (scope_chain->omp_declare_target_attribute, a);
cp_parser_require_pragma_eol (parser, pragma_tok);
return;
}
- for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
- device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
{
if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
+ device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT)
+ indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c));
+ }
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ {
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT)
continue;
tree t = OMP_CLAUSE_DECL (c);
- only_device_type = false;
- if (!handle_omp_declare_target_clause (c, t, device_type))
+ only_device_type_or_indirect = false;
+ if (!handle_omp_declare_target_clause (c, t, device_type, indirect))
continue;
if (VAR_OR_FUNCTION_DECL_P (t)
&& DECL_LOCAL_DECL_P (t)
&& DECL_LOCAL_DECL_ALIAS (t)
&& DECL_LOCAL_DECL_ALIAS (t) != error_mark_node)
handle_omp_declare_target_clause (c, DECL_LOCAL_DECL_ALIAS (t),
- device_type);
+ device_type, indirect);
}
- if (device_type && only_device_type)
+ if ((device_type || indirect) && only_device_type_or_indirect)
+ error_at (OMP_CLAUSE_LOCATION (clauses),
+ "directive with only %<device_type%> or %<indirect%> clauses");
+ if (indirect && device_type && device_type != OMP_CLAUSE_DEVICE_TYPE_ANY)
error_at (OMP_CLAUSE_LOCATION (clauses),
- "directive with only %<device_type%> clause");
+ "%<device_type%> clause must specify 'any' when used with "
+ "an %<indirect%> clause");
}
/* OpenMP 5.1
# pragma omp begin declare target clauses[optseq] new-line */
#define OMP_BEGIN_DECLARE_TARGET_CLAUSE_MASK \
- (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE)
+ ( (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEVICE_TYPE) \
+ | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_INDIRECT))
static void
cp_parser_omp_begin (cp_parser *parser, cp_token *pragma_tok)
"#pragma omp begin declare target",
pragma_tok);
int device_type = 0;
+ bool indirect = 0;
for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
- if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
- device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
+ {
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEVICE_TYPE)
+ device_type |= OMP_CLAUSE_DEVICE_TYPE_KIND (c);
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_INDIRECT)
+ indirect |= !integer_zerop (OMP_CLAUSE_INDIRECT_EXPR (c));
+ }
cp_omp_declare_target_attr a
- = { in_omp_attribute_pragma, device_type };
+ = { in_omp_attribute_pragma, device_type, indirect };
vec_safe_push (scope_chain->omp_declare_target_attribute, a);
}
else
case OMP_CLAUSE_IF_PRESENT:
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE_NOHOST:
+ case OMP_CLAUSE_INDIRECT:
break;
case OMP_CLAUSE_MERGEABLE:
LTO_symtab_edge,
LTO_symtab_indirect_edge,
LTO_symtab_variable,
+ LTO_symtab_indirect_function,
LTO_symtab_last_tag
};
(*offload_vars)[i]);
}
+ for (unsigned i = 0; i < vec_safe_length (offload_ind_funcs); i++)
+ {
+ symtab_node *node = symtab_node::get ((*offload_ind_funcs)[i]);
+ if (!node)
+ continue;
+ node->force_output = true;
+ streamer_write_enum (ob->main_stream, LTO_symtab_tags,
+ LTO_symtab_last_tag, LTO_symtab_indirect_function);
+ lto_output_fn_decl_ref (ob->decl_state, ob->main_stream,
+ (*offload_ind_funcs)[i]);
+ }
+
if (output_requires)
{
HOST_WIDE_INT val = ((HOST_WIDE_INT) omp_requires_mask
{
vec_free (offload_funcs);
vec_free (offload_vars);
+ vec_free (offload_ind_funcs);
}
}
varpool_node::get (var_decl)->force_output = 1;
tmp_decl = var_decl;
}
+ else if (tag == LTO_symtab_indirect_function)
+ {
+ tree fn_decl
+ = lto_input_fn_decl_ref (ib, file_data);
+ vec_safe_push (offload_ind_funcs, fn_decl);
+
+ /* Prevent IPA from removing fn_decl as unreachable, since there
+ may be no refs from the parent function to child_fn in offload
+ LTO mode. */
+ if (do_force_output)
+ cgraph_node::get (fn_decl)->mark_force_output ();
+ tmp_decl = fn_decl;
+ }
else if (tag == LTO_symtab_edge)
{
static bool error_emitted = false;
#define OFFLOAD_VAR_TABLE_SECTION_NAME ".gnu.offload_vars"
#define OFFLOAD_FUNC_TABLE_SECTION_NAME ".gnu.offload_funcs"
+#define OFFLOAD_IND_FUNC_TABLE_SECTION_NAME ".gnu.offload_ind_funcs"
#endif /* GCC_LTO_SECTION_NAMES_H */
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA,
"GOMP_target_enter_exit_data",
BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_UINT_PTR, ATTR_NOTHROW_LIST)
+DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR,
+ "GOMP_target_map_indirect_ptr",
+ BT_FN_PTR_PTR, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS4, "GOMP_teams4",
BT_FN_BOOL_UINT_UINT_UINT_BOOL, ATTR_NOTHROW_LIST)
DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TEAMS_REG, "GOMP_teams_reg",
};
/* Holds offload tables with decls. */
-vec<tree, va_gc> *offload_funcs, *offload_vars;
+vec<tree, va_gc> *offload_funcs, *offload_vars, *offload_ind_funcs;
/* Return level at which oacc routine may spawn a partitioned loop, or
-1 if it is not a routine (i.e. is an offload fn). */
if (DECL_SAVED_TREE (node->decl))
{
struct cgraph_node *cgn;
+ if (lookup_attribute ("omp declare target indirect",
+ DECL_ATTRIBUTES (node->decl)))
+ vec_safe_push (offload_ind_funcs, node->decl);
if (omp_declare_target_fn_p (node->decl))
worklist.safe_push (node->decl);
else if (DECL_STRUCT_FUNCTION (node->decl)
{
unsigned num_funcs = vec_safe_length (offload_funcs);
unsigned num_vars = vec_safe_length (offload_vars);
+ unsigned num_ind_funcs = vec_safe_length (offload_ind_funcs);
- if (num_funcs == 0 && num_vars == 0)
+ if (num_funcs == 0 && num_vars == 0 && num_ind_funcs == 0)
return;
if (targetm_common.have_named_sections)
{
- vec<constructor_elt, va_gc> *v_f, *v_v;
+ vec<constructor_elt, va_gc> *v_f, *v_v, *v_if;
vec_alloc (v_f, num_funcs);
vec_alloc (v_v, num_vars * 2);
+ vec_alloc (v_if, num_ind_funcs);
add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
add_decls_addresses_to_decl_constructor (offload_vars, v_v);
+ add_decls_addresses_to_decl_constructor (offload_ind_funcs, v_if);
tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
vec_safe_length (v_v));
tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
num_funcs);
+ tree ind_funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
+ num_ind_funcs);
+
SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
+ SET_TYPE_ALIGN (ind_funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
tree ctor_v = build_constructor (vars_decl_type, v_v);
tree ctor_f = build_constructor (funcs_decl_type, v_f);
- TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
- TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
+ tree ctor_if = build_constructor (ind_funcs_decl_type, v_if);
+ TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = TREE_CONSTANT (ctor_if) = 1;
+ TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = TREE_STATIC (ctor_if) = 1;
tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
get_identifier (".offload_func_table"),
funcs_decl_type);
tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
get_identifier (".offload_var_table"),
vars_decl_type);
- TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
+ tree ind_funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+ get_identifier (".offload_ind_func_table"),
+ ind_funcs_decl_type);
+ TREE_STATIC (funcs_decl) = TREE_STATIC (ind_funcs_decl) = 1;
+ TREE_STATIC (vars_decl) = 1;
/* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
otherwise a joint table in a binary will contain padding between
tables from multiple object files. */
- DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
+ DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (ind_funcs_decl) = 1;
+ DECL_USER_ALIGN (vars_decl) = 1;
SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
+ SET_DECL_ALIGN (ind_funcs_decl, TYPE_ALIGN (ind_funcs_decl_type));
DECL_INITIAL (funcs_decl) = ctor_f;
DECL_INITIAL (vars_decl) = ctor_v;
+ DECL_INITIAL (ind_funcs_decl) = ctor_if;
set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
-
+ set_decl_section_name (ind_funcs_decl,
+ OFFLOAD_IND_FUNC_TABLE_SECTION_NAME);
varpool_node::finalize_decl (vars_decl);
varpool_node::finalize_decl (funcs_decl);
+ varpool_node::finalize_decl (ind_funcs_decl);
}
else
{
#endif
targetm.record_offload_symbol (it);
}
+ for (unsigned i = 0; i < num_ind_funcs; i++)
+ {
+ tree it = (*offload_ind_funcs)[i];
+ /* See also add_decls_addresses_to_decl_constructor
+ and output_offload_tables in lto-cgraph.cc. */
+ if (!in_lto_p && !symtab_node::get (it))
+ continue;
+ targetm.record_offload_symbol (it);
+ }
}
}
gimple_stmt_iterator gsi;
bool calls_declare_variant_alt
= cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
+#ifdef ACCEL_COMPILER
+ bool omp_redirect_indirect_calls = vec_safe_length (offload_ind_funcs) > 0;
+ tree map_ptr_fn
+ = builtin_decl_explicit (BUILT_IN_GOMP_TARGET_MAP_INDIRECT_PTR);
+#endif
FOR_EACH_BB_FN (bb, cfun)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
update_stmt (stmt);
}
}
+#ifdef ACCEL_COMPILER
+ if (omp_redirect_indirect_calls
+ && gimple_call_fndecl (stmt) == NULL_TREE)
+ {
+ gcall *orig_call = dyn_cast <gcall *> (stmt);
+ tree call_fn = gimple_call_fn (stmt);
+ tree fn_ty = TREE_TYPE (call_fn);
+
+ if (TREE_CODE (call_fn) == OBJ_TYPE_REF)
+ {
+ tree obj_ref = create_tmp_reg (TREE_TYPE (call_fn),
+ ".ind_fn_objref");
+ gimple *gassign = gimple_build_assign (obj_ref, call_fn);
+ gsi_insert_before (&gsi, gassign, GSI_SAME_STMT);
+ call_fn = obj_ref;
+ }
+ tree mapped_fn = create_tmp_reg (fn_ty, ".ind_fn");
+ gimple *gcall =
+ gimple_build_call (map_ptr_fn, 1, call_fn);
+ gimple_set_location (gcall, gimple_location (stmt));
+ gimple_call_set_lhs (gcall, mapped_fn);
+ gsi_insert_before (&gsi, gcall, GSI_SAME_STMT);
+
+ gimple_call_set_fn (orig_call, mapped_fn);
+ update_stmt (orig_call);
+ }
+#endif
continue;
}
tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
/* opt_pass methods: */
bool gate (function *fun) final override
{
+#ifdef ACCEL_COMPILER
+ bool offload_ind_funcs_p = vec_safe_length (offload_ind_funcs) > 0;
+#else
+ bool offload_ind_funcs_p = false;
+#endif
return (!(fun->curr_properties & PROP_gimple_lomp_dev)
|| (flag_openmp
- && cgraph_node::get (fun->decl)->calls_declare_variant_alt));
+ && (cgraph_node::get (fun->decl)->calls_declare_variant_alt
+ || offload_ind_funcs_p)));
}
unsigned int execute (function *) final override
{
extern GTY(()) vec<tree, va_gc> *offload_funcs;
extern GTY(()) vec<tree, va_gc> *offload_vars;
+extern GTY(()) vec<tree, va_gc> *offload_ind_funcs;
extern void omp_finish_file (void);
extern void omp_discover_implicit_declare_target (void);
/* { dg-do compile } */
/* { dg-options "-fopenmp" } */
-#pragma omp declare target device_type (any) /* { dg-error "directive with only 'device_type' clause" } */
+#pragma omp declare target device_type (any) /* { dg-error "directive with only 'device_type' or 'indirect' clauses" } */
void f1 (void) {}
#pragma omp declare target device_type (host) to (f1) device_type (nohost) /* { dg-error "too many 'device_type' clauses" } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-fopenmp" } */
+
+extern int a, b;
+#define X 1
+#define Y 0
+
+#pragma omp begin declare target indirect
+void fn1 (void) { }
+#pragma omp end declare target
+
+#pragma omp begin declare target indirect (1)
+void fn2 (void) { }
+#pragma omp end declare target
+
+#pragma omp begin declare target indirect (0)
+void fn3 (void) { }
+#pragma omp end declare target
+
+void fn4 (void) { }
+#pragma omp declare target indirect to (fn4)
+
+void fn5 (void) { }
+#pragma omp declare target indirect (1) to (fn5)
+
+void fn6 (void) { }
+#pragma omp declare target indirect (0) to (fn6)
+
+void fn7 (void) { }
+#pragma omp declare target indirect (-1) to (fn7)
+
+/* Compile-time non-constant expressions are not allowed. */
+void fn8 (void) { }
+#pragma omp declare target indirect (a + b) to (fn8) /* { dg-error "expected constant logical expression" } */
+
+/* Compile-time constant expressions are permissible. */
+void fn9 (void) { }
+#pragma omp declare target indirect (X*Y) to (fn9)
+
+/* 'omp declare target'...'omp end declare target' form cannot take clauses. */
+#pragma omp declare target indirect /* { dg-error "directive with only 'device_type' or 'indirect' clauses" }*/
+void fn10 (void) { }
+#pragma omp end declare target /* { dg-error "'#pragma omp end declare target' without corresponding '#pragma omp declare target' or '#pragma omp begin declare target'" } */
+
+void fn11 (void) { }
+#pragma omp declare target indirect (1) indirect (0) to (fn11) /* { dg-error "too many .indirect. clauses" } */
+
+void fn12 (void) { }
+#pragma omp declare target indirect ("abs") to (fn12)
+
+void fn13 (void) { }
+#pragma omp declare target indirect (5.5) enter (fn13)
+
+void fn14 (void) { }
+#pragma omp declare target indirect (1) device_type (host) enter (fn14) /* { dg-error "'device_type' clause must specify 'any' when used with an 'indirect' clause" } */
+
+void fn15 (void) { }
+#pragma omp declare target indirect (0) device_type (nohost) enter (fn15)
+
+/* Indirect on a variable should have no effect. */
+int x;
+#pragma omp declare target indirect to(x)
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-fopenmp -fdump-tree-gimple" } */
+
+#pragma omp begin declare target indirect
+void fn1 (void) { }
+#pragma omp end declare target
+/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block, omp declare target indirect\\\)\\\)\\\nvoid fn1" "gimple" } } */
+
+#pragma omp begin declare target indirect (0)
+void fn2 (void) { }
+#pragma omp end declare target
+/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block\\\)\\\)\\\nvoid fn2" "gimple" } } */
+
+void fn3 (void) { }
+#pragma omp declare target indirect to (fn3)
+/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target indirect, omp declare target\\\)\\\)\\\nvoid fn3" "gimple" } } */
+
+void fn4 (void) { }
+#pragma omp declare target indirect (0) to (fn4)
+/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target\\\)\\\)\\\nvoid fn4" "gimple" } } */
+
+#pragma omp begin declare target indirect(1)
+ int foo(void) { return 5; }
+ #pragma omp begin declare target indirect(0)
+ int bar(void) { return 8; }
+ int baz(void) { return 11; }
+ #pragma omp declare target indirect enter(baz)
+ #pragma omp end declare target
+#pragma omp end declare target
+/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block, omp declare target indirect\\\)\\\)\\\nint foo" "gimple" } } */
+/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target, omp declare target block\\\)\\\)\\\nint bar" "gimple" } } */
+/* { dg-final { scan-tree-dump "__attribute__\\\(\\\(omp declare target indirect, omp declare target, omp declare target block\\\)\\\)\\\nint baz" "gimple" } } */
[[omp::decl (declare target (v8))]] static int v9; // { dg-error "expected end of line before '\\\(' token" }
[[omp::decl (declare target enter (v8))]] static int v10; // { dg-error "expected an OpenMP clause before '\\\(' token" }
[[omp::decl (declare target, link (v9))]] static int v11; // { dg-error "expected an OpenMP clause before '\\\(' token" }
- [[omp::decl (declare target device_type (any))]] static int v12; // { dg-error "directive with only 'device_type' clause" }
+ [[omp::decl (declare target device_type (any))]] static int v12; // { dg-error "directive with only 'device_type' or 'indirect' clauses" }
}
int i;
--- /dev/null
+// { dg-skip-if "c++98 does not support attributes" { c++98_only } }
+
+[[omp::decl (declare target, indirect(1))]] // { dg-error "directive with only 'device_type' or 'indirect' clause" }
+int f (void) { return 5; }
+
+[[omp::decl (declare target indirect)]] // { dg-error "directive with only 'device_type' or 'indirect' clause" }
+int g (void) { return 8; }
+
+[[omp::directive (begin declare target, indirect)]];
+int h (void) { return 11; }
+[[omp::directive (end declare target)]];
+
+int i (void) { return 8; }
+[[omp::directive (declare target to(i), indirect (1))]];
+
+int j (void) { return 11; }
+[[omp::directive (declare target indirect enter (j))]];
[[omp::decl (declare target (v8))]] static int v9; /* { dg-error "expected end of line before '\\\(' token" } */
[[omp::decl (declare target enter (v8))]] static int v10; /* { dg-error "expected an OpenMP clause before '\\\(' token" } */
[[omp::decl (declare target, link (v9))]] static int v11; /* { dg-error "expected an OpenMP clause before '\\\(' token" } */
- [[omp::decl (declare target device_type (any))]] static int v12; /* { dg-error "directive with only 'device_type' clause" } */
+ [[omp::decl (declare target device_type (any))]] static int v12; /* { dg-error "directive with only 'device_type' or 'indirect' clauses" } */
}
int i;
/* OpenMP clause: doacross ({source,sink}:vec). */
OMP_CLAUSE_DOACROSS,
+ /* OpenMP clause: indirect [(constant-integer-expression)]. */
+ OMP_CLAUSE_INDIRECT,
+
/* Internal structure to hold OpenACC cache directive's variable-list.
#pragma acc cache (variable-list). */
OMP_CLAUSE__CACHE_,
2, /* OMP_CLAUSE_MAP */
1, /* OMP_CLAUSE_HAS_DEVICE_ADDR */
1, /* OMP_CLAUSE_DOACROSS */
+ 1, /* OMP_CLAUSE_INDIRECT */
2, /* OMP_CLAUSE__CACHE_ */
2, /* OMP_CLAUSE_GANG */
1, /* OMP_CLAUSE_ASYNC */
"map",
"has_device_addr",
"doacross",
+ "indirect",
"_cache_",
"gang",
"async",
#define OMP_CLAUSE_DEVICE_TYPE_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE_TYPE)->omp_clause.subcode.device_type_kind)
+#define OMP_CLAUSE_INDIRECT_EXPR(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_INDIRECT), 0)
+
+
/* True if there is a device clause with a device-modifier 'ancestor'. */
#define OMP_CLAUSE_DEVICE_ANCESTOR(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DEVICE)->base.public_flag)
/* Versions of libgomp and device-specific plugins. GOMP_VERSION
should be incremented whenever an ABI-incompatible change is introduced
to the plugin interface defined in libgomp/libgomp.h. */
-#define GOMP_VERSION 2
+#define GOMP_VERSION 3
#define GOMP_VERSION_NVIDIA_PTX 1
#define GOMP_VERSION_GCN 3
#define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff)
#define GOMP_VERSION_DEV(PACK) ((PACK) & 0xffff)
+#define GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS(VER) (GOMP_VERSION_LIB(VER) >= 3)
+
#define GOMP_DIM_GANG 0
#define GOMP_DIM_WORKER 1
#define GOMP_DIM_VECTOR 2
#if defined(HAVE_GAS_HIDDEN) && ENABLE_OFFLOADING == 1
#define OFFLOAD_FUNC_TABLE_SECTION_NAME ".gnu.offload_funcs"
+#define OFFLOAD_IND_FUNC_TABLE_SECTION_NAME ".gnu.offload_ind_funcs"
#define OFFLOAD_VAR_TABLE_SECTION_NAME ".gnu.offload_vars"
#ifdef CRT_BEGIN
const void *const __offload_var_table[0]
__attribute__ ((__used__, visibility ("hidden"),
section (OFFLOAD_VAR_TABLE_SECTION_NAME))) = { };
+const void *const __offload_ind_func_table[0]
+ __attribute__ ((__used__, visibility ("hidden"),
+ section (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME))) = { };
#elif defined CRT_END
const void *const __offload_vars_end[0]
__attribute__ ((__used__, visibility ("hidden"),
section (OFFLOAD_VAR_TABLE_SECTION_NAME))) = { };
+const void *const __offload_ind_funcs_end[0]
+ __attribute__ ((__used__, visibility ("hidden"),
+ section (OFFLOAD_IND_FUNC_TABLE_SECTION_NAME))) = { };
#elif defined CRT_TABLE
extern const void *const __offload_func_table[];
extern const void *const __offload_var_table[];
+extern const void *const __offload_ind_func_table[];
extern const void *const __offload_funcs_end[];
extern const void *const __offload_vars_end[];
+extern const void *const __offload_ind_funcs_end[];
const void *const __OFFLOAD_TABLE__[]
__attribute__ ((__visibility__ ("hidden"))) =
{
&__offload_func_table, &__offload_funcs_end,
- &__offload_var_table, &__offload_vars_end
+ &__offload_var_table, &__offload_vars_end,
+ &__offload_ind_func_table, &__offload_ind_funcs_end,
};
#else /* ! CRT_BEGIN && ! CRT_END && ! CRT_TABLE */
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c
+ oacc-target.c target-indirect.c
include $(top_srcdir)/plugin/Makefrag.am
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo $(am__objects_1)
+ oacc-target.lo target-indirect.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c $(am__append_3)
+ oacc-target.c target-indirect.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@
--- /dev/null
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ Contributed by Siemens.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <assert.h>
+#include "libgomp.h"
+
+#define splay_tree_prefix indirect
+#define splay_tree_c
+#include "splay-tree.h"
+
+volatile void **GOMP_INDIRECT_ADDR_MAP = NULL;
+
+/* Use a splay tree to lookup the target address instead of using a
+ linear search. */
+#define USE_SPLAY_TREE_LOOKUP
+
+#ifdef USE_SPLAY_TREE_LOOKUP
+
+static struct indirect_splay_tree_s indirect_map;
+static indirect_splay_tree_node indirect_array = NULL;
+
+/* Build the splay tree used for host->target address lookups. */
+
+void
+build_indirect_map (void)
+{
+ size_t num_ind_funcs = 0;
+ volatile void **map_entry;
+ static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */
+
+ if (!GOMP_INDIRECT_ADDR_MAP)
+ return;
+
+ gomp_mutex_lock (&lock);
+
+ if (!indirect_array)
+ {
+ /* Count the number of entries in the NULL-terminated address map. */
+ for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
+ map_entry += 2, num_ind_funcs++);
+
+ /* Build splay tree for address lookup. */
+ indirect_array = gomp_malloc (num_ind_funcs * sizeof (*indirect_array));
+ indirect_splay_tree_node array = indirect_array;
+ map_entry = GOMP_INDIRECT_ADDR_MAP;
+
+ for (int i = 0; i < num_ind_funcs; i++, array++)
+ {
+ indirect_splay_tree_key k = &array->key;
+ k->host_addr = (uint64_t) *map_entry++;
+ k->target_addr = (uint64_t) *map_entry++;
+ array->left = NULL;
+ array->right = NULL;
+ indirect_splay_tree_insert (&indirect_map, array);
+ }
+ }
+
+ gomp_mutex_unlock (&lock);
+}
+
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+ /* NULL pointers always resolve to NULL. */
+ if (!ptr)
+ return ptr;
+
+ assert (indirect_array);
+
+ struct indirect_splay_tree_key_s k;
+ indirect_splay_tree_key node = NULL;
+
+ k.host_addr = (uint64_t) ptr;
+ node = indirect_splay_tree_lookup (&indirect_map, &k);
+
+ return node ? (void *) node->target_addr : ptr;
+}
+
+#else
+
+void
+build_indirect_map (void)
+{
+}
+
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+ /* NULL pointers always resolve to NULL. */
+ if (!ptr)
+ return ptr;
+
+ assert (GOMP_INDIRECT_ADDR_MAP);
+
+ for (volatile void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
+ map_entry += 2)
+ if (*map_entry == ptr)
+ return (void *) *(map_entry + 1);
+
+ return ptr;
+}
+
+#endif
#include <string.h>
static void gomp_thread_start (struct gomp_thread_pool *);
+extern void build_indirect_map (void);
/* This externally visible function handles target region entry. It
sets up a per-team thread pool and transfers control by returning to
{
int threadid = __builtin_gcn_dim_pos (1);
+ /* Initialize indirect function support. */
+ build_indirect_map ();
+
if (threadid == 0)
{
int numthreads = __builtin_gcn_dim_size (1);
--- /dev/null
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ Contributed by Siemens.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+ /* Calls to this function should not be generated for host code. */
+ __builtin_unreachable ();
+}
int __gomp_team_num __attribute__((shared,nocommon));
static void gomp_thread_start (struct gomp_thread_pool *);
+extern void build_indirect_map (void);
/* This externally visible function handles target region entry. It
int tid, ntids;
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
+
+ /* Initialize indirect function support. */
+ build_indirect_map ();
+
if (tid == 0)
{
gomp_global_icv.nthreads_var = ntids;
must be stringified). */
#define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs
+#define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map
+
/* Miscellaneous functions. */
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__ ((malloc));
extern bool GOMP_OFFLOAD_fini_device (int);
extern unsigned GOMP_OFFLOAD_version (void);
extern int GOMP_OFFLOAD_load_image (int, unsigned, const void *,
- struct addr_pair **, uint64_t **);
+ struct addr_pair **, uint64_t **,
+ uint64_t *);
extern bool GOMP_OFFLOAD_unload_image (int, unsigned, const void *);
extern void *GOMP_OFFLOAD_alloc (int, size_t);
extern bool GOMP_OFFLOAD_free (int, void *);
#define splay_tree_prefix reverse
#include "splay-tree.h"
+/* Indirect target function splay-tree handling. */
+
+struct indirect_splay_tree_key_s {
+ uint64_t host_addr, target_addr;
+};
+
+typedef struct indirect_splay_tree_node_s *indirect_splay_tree_node;
+typedef struct indirect_splay_tree_s *indirect_splay_tree;
+typedef struct indirect_splay_tree_key_s *indirect_splay_tree_key;
+
+static inline int
+indirect_splay_compare (indirect_splay_tree_key x, indirect_splay_tree_key y)
+{
+ if (x->host_addr < y->host_addr)
+ return -1;
+ if (x->host_addr > y->host_addr)
+ return 1;
+ return 0;
+}
+
+#define splay_tree_prefix indirect
+#include "splay-tree.h"
+
struct target_mem_desc {
/* Reference count. */
uintptr_t refcount;
GOMP_5.1.1 {
global:
GOMP_taskwait_depend_nowait;
+ GOMP_target_map_indirect_ptr;
} GOMP_5.1;
OACC_2.0 {
@item Iterators in @code{target update} motion clauses and @code{map}
clauses @tab N @tab
@item Indirect calls to the device version of a procedure or function in
- @code{target} regions @tab N @tab
+ @code{target} regions @tab P @tab Only C and C++
@item @code{interop} directive @tab N @tab
@item @code{omp_interop_t} object support in runtime routines @tab N @tab
@item @code{nowait} clause in @code{taskwait} directive @tab Y @tab
@item For Fortran, diagnose placing declarative before/between @code{USE},
@code{IMPORT}, and @code{IMPLICIT} as invalid @tab N @tab
@item Optional comma between directive and clause in the @code{#pragma} form @tab Y @tab
-@item @code{indirect} clause in @code{declare target} @tab N @tab
+@item @code{indirect} clause in @code{declare target} @tab P @tab Only C and C++
@item @code{device_type(nohost)}/@code{device_type(host)} for variables @tab N @tab
@item @code{present} modifier to the @code{map}, @code{to} and @code{from}
clauses @tab Y @tab
@item @code{all} as @emph{implicit-behavior} for @code{defaultmap} @tab Y @tab
@item @emph{interop_types} in any position of the modifier list for the @code{init} clause
of the @code{interop} construct @tab N @tab
+@item Invoke virtual member functions of C++ objects created on the host device
+ on other devices @tab N @tab
@end multitable
void **);
extern void GOMP_teams (unsigned int, unsigned int);
extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
+extern void *GOMP_target_map_indirect_ptr (void *);
/* teams.c */
unsigned v __attribute__ ((unused)),
const void *t __attribute__ ((unused)),
struct addr_pair **r __attribute__ ((unused)),
- uint64_t **f __attribute__ ((unused)))
+ uint64_t **f __attribute__ ((unused)),
+ uint64_t *i __attribute__ ((unused)))
{
return 0;
}
} *gcn_image;
const unsigned kernel_count;
struct hsa_kernel_description *kernel_infos;
+ const unsigned ind_func_count;
const unsigned global_variable_count;
};
int
GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct addr_pair **target_table,
- uint64_t **rev_fn_table)
+ uint64_t **rev_fn_table,
+ uint64_t *host_ind_fn_table)
{
if (GOMP_VERSION_DEV (version) != GOMP_VERSION_GCN)
{
struct module_info *module;
struct kernel_info *kernel;
int kernel_count = image_desc->kernel_count;
+ unsigned ind_func_count = GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version)
+ ? image_desc->ind_func_count : 0;
unsigned var_count = image_desc->global_variable_count;
/* Currently, "others" is a struct of ICVS. */
int other_count = 1;
return -1;
GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
+ GCN_DEBUG ("Encountered %d indirect functions in an image\n", ind_func_count);
GCN_DEBUG ("Encountered %u global variables in an image\n", var_count);
GCN_DEBUG ("Expect %d other variables in an image\n", other_count);
pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2)
}
}
+ if (ind_func_count > 0)
+ {
+ hsa_status_t status;
+
+ /* Read indirect function table from image. */
+ hsa_executable_symbol_t ind_funcs_symbol;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ ".offload_ind_func_table",
+ agent->id,
+ 0, &ind_funcs_symbol);
+
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not find .offload_ind_func_table symbol in the "
+ "code object", status);
+
+ uint64_t ind_funcs_table_addr;
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (ind_funcs_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &ind_funcs_table_addr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+
+ uint64_t ind_funcs_table[ind_func_count];
+ GOMP_OFFLOAD_dev2host (agent->device_id, ind_funcs_table,
+ (void*) ind_funcs_table_addr,
+ sizeof (ind_funcs_table));
+
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_func_count * 2 + 1];
+ for (unsigned i = 0; i < ind_func_count; i++)
+ {
+ ind_fn_map[i * 2] = host_ind_fn_table[i];
+ ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
+ GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+ i, host_ind_fn_table[i], ind_funcs_table[i]);
+ }
+ ind_fn_map[ind_func_count * 2] = 0;
+
+ /* Write the map onto the target. */
+ void *map_target_addr
+ = GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map));
+ GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
+
+ GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+ (void*) ind_fn_map,
+ sizeof (ind_fn_map));
+
+ /* Write address of the map onto the target. */
+ hsa_executable_symbol_t symbol;
+
+ status
+ = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ XSTRING (GOMP_INDIRECT_ADDR_MAP),
+ agent->id, 0, &symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object",
+ status);
+
+ uint64_t varptr;
+ uint32_t varsize;
+
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &varptr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
+ &varsize);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable size from its symbol",
+ status);
+
+ GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n",
+ varptr, varsize);
+
+ GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr,
+ &map_target_addr,
+ sizeof (map_target_addr));
+ }
+
GCN_DEBUG ("Looking for variable %s\n", XSTRING (GOMP_ADDITIONAL_ICVS));
hsa_status_t status;
const struct targ_fn_launch *fn_descs;
unsigned fn_num;
+
+ unsigned ind_fn_num;
} nvptx_tdata_t;
/* Descriptor of a loaded function. */
int
GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct addr_pair **target_table,
- uint64_t **rev_fn_table)
+ uint64_t **rev_fn_table,
+ uint64_t *host_ind_fn_table)
{
CUmodule module;
const char *const *var_names;
const struct targ_fn_launch *fn_descs;
- unsigned int fn_entries, var_entries, other_entries, i, j;
+ unsigned int fn_entries, var_entries, ind_fn_entries, other_entries, i, j;
struct targ_fn_descriptor *targ_fns;
struct addr_pair *targ_tbl;
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
var_names = img_header->var_names;
fn_entries = img_header->fn_num;
fn_descs = img_header->fn_descs;
+ ind_fn_entries = GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version)
+ ? img_header->ind_fn_num : 0;
/* Currently, other_entries contains only the struct of ICVs. */
other_entries = 1;
targ_tbl->end = targ_tbl->start + bytes;
}
+ if (ind_fn_entries > 0)
+ {
+ CUdeviceptr var;
+ size_t bytes;
+
+ /* Read indirect function table from image. */
+ CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &var, &bytes, module,
+ "$offload_ind_func_table");
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
+ assert (bytes == sizeof (uint64_t) * ind_fn_entries);
+
+ uint64_t ind_fn_table[ind_fn_entries];
+ r = CUDA_CALL_NOCHECK (cuMemcpyDtoH, ind_fn_table, var, bytes);
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
+
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
+ for (unsigned k = 0; k < ind_fn_entries; k++)
+ {
+ ind_fn_map[k * 2] = host_ind_fn_table[k];
+ ind_fn_map[k * 2 + 1] = ind_fn_table[k];
+ GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+ k, host_ind_fn_table[k], ind_fn_table[k]);
+ }
+ ind_fn_map[ind_fn_entries * 2] = 0;
+
+ /* Write the map onto the target. */
+ void *map_target_addr
+ = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
+ GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
+
+ GOMP_OFFLOAD_host2dev (ord, map_target_addr,
+ (void*) ind_fn_map,
+ sizeof (ind_fn_map));
+
+ /* Write address of the map onto the target. */
+ CUdeviceptr varptr;
+ size_t varsize;
+ r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
+ module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s",
+ cuda_error (r));
+
+ GOMP_PLUGIN_debug (0,
+ "Indirect map variable found at %llx with size %ld\n",
+ varptr, varsize);
+
+ GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr,
+ sizeof (map_target_addr));
+ }
+
CUdeviceptr varptr;
size_t varsize;
CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
void **host_funcs_end = ((void ***) host_table)[1];
void **host_var_table = ((void ***) host_table)[2];
void **host_vars_end = ((void ***) host_table)[3];
+ void **host_ind_func_table = NULL;
+ void **host_ind_funcs_end = NULL;
- /* The func table contains only addresses, the var table contains addresses
- and corresponding sizes. */
+ if (GOMP_VERSION_SUPPORTS_INDIRECT_FUNCS (version))
+ {
+ host_ind_func_table = ((void ***) host_table)[4];
+ host_ind_funcs_end = ((void ***) host_table)[5];
+ }
+
+ /* The func and ind_func tables contain only addresses, the var table
+ contains addresses and corresponding sizes. */
int num_funcs = host_funcs_end - host_func_table;
int num_vars = (host_vars_end - host_var_table) / 2;
+ int num_ind_funcs = (host_ind_funcs_end - host_ind_func_table);
/* Load image to device and get target addresses for the image. */
struct addr_pair *target_table = NULL;
num_target_entries
= devicep->load_image_func (devicep->target_id, version,
target_data, &target_table,
- rev_lookup ? &rev_target_fn_table : NULL);
+ rev_lookup ? &rev_target_fn_table : NULL,
+ num_ind_funcs
+ ? (uint64_t *) host_ind_func_table : NULL);
if (num_target_entries != num_funcs + num_vars
/* "+1" due to the additional ICV struct. */
--- /dev/null
+// { dg-run }
+
+#pragma omp begin declare target indirect
+class C
+{
+public:
+ int y;
+ int f (int x) { return x + y; }
+};
+#pragma omp end declare target
+
+int main (void)
+{
+ C c;
+ c.y = 27;
+ int x;
+ int (C::*fn_ptr) (int) = &C::f;
+
+#pragma omp target map (to: c, fn_ptr) map (from: x)
+ x = (c.*fn_ptr) (42);
+
+ return x != 27 + 42;
+}
--- /dev/null
+/* { dg-do run } */
+
+#pragma omp begin declare target indirect
+int foo(void) { return 5; }
+int bar(void) { return 8; }
+int baz(void) { return 11; }
+#pragma omp end declare target
+
+int main (void)
+{
+ int x;
+ int (*foo_ptr) (void) = &foo;
+ int (*bar_ptr) (void) = &bar;
+ int (*baz_ptr) (void) = &baz;
+ int expected = foo () + bar () + baz ();
+
+#pragma omp target map (to: foo_ptr, bar_ptr, baz_ptr) map (from: x)
+ x = (*foo_ptr) () + (*bar_ptr) () + (*baz_ptr) ();
+
+ return x - expected;
+}
--- /dev/null
+/* { dg-do run } */
+
+#define N 256
+
+#pragma omp begin declare target indirect
+int foo(void) { return 5; }
+int bar(void) { return 8; }
+int baz(void) { return 11; }
+#pragma omp end declare target
+
+int main (void)
+{
+ int i, x = 0, expected = 0;
+ int (*fn_ptr[N])(void);
+
+ for (i = 0; i < N; i++)
+ {
+ switch (i % 3)
+ {
+ case 0: fn_ptr[i] = &foo;
+ case 1: fn_ptr[i] = &bar;
+ case 2: fn_ptr[i] = &baz;
+ }
+ expected += (*fn_ptr[i]) ();
+ }
+
+#pragma omp target teams distribute parallel for reduction(+: x) \
+ map (to: fn_ptr) map (tofrom: x)
+ for (int i = 0; i < N; i++)
+ x += (*fn_ptr[i]) ();
+
+ return x - expected;
+}