]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
OpenMP: Support reverse offload (middle end part)
authorTobias Burnus <tobias@codesourcery.com>
Tue, 30 Aug 2022 19:44:10 +0000 (21:44 +0200)
committerTobias Burnus <tobias@codesourcery.com>
Tue, 30 Aug 2022 19:47:59 +0000 (21:47 +0200)
gcc/ChangeLog:

* internal-fn.cc (expand_GOMP_TARGET_REV): New.
* internal-fn.def (GOMP_TARGET_REV): New.
* lto-cgraph.cc (lto_output_node, verify_node_partition): Mark
'omp target device_ancestor_host' as in_other_partition and don't
error if absent.
* omp-low.cc (create_omp_child_function): Mark as 'noclone'.
* omp-expand.cc (expand_omp_target): For reverse offload, remove
sorry, use device = GOMP_DEVICE_HOST_FALLBACK and create
empty-body nohost function.
* omp-offload.cc (execute_omp_device_lower): Handle
IFN_GOMP_TARGET_REV.
(pass_omp_target_link::execute): For ACCEL_COMPILER, don't
nullify fn argument for reverse offload

libgomp/ChangeLog:

* libgomp.texi (OpenMP 5.0): Mark 'ancestor' as implemented but
refer to 'requires'.
* testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c: New test.
* testsuite/libgomp.c-c++-common/reverse-offload-1.c: New test.
* testsuite/libgomp.fortran/reverse-offload-1-aux.f90: New test.
* testsuite/libgomp.fortran/reverse-offload-1.f90: New test.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/reverse-offload-1.c: Remove dg-sorry.
* c-c++-common/gomp/target-device-ancestor-4.c: Likewise.
* gfortran.dg/gomp/target-device-ancestor-4.f90: Likewise.
* gfortran.dg/gomp/target-device-ancestor-5.f90: Likewise.
* c-c++-common/goacc/classify-kernels-parloops.c: Add 'noclone' to
scan-tree-dump-times.
* c-c++-common/goacc/classify-kernels-unparallelized-parloops.c:
Likewise.
* c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise.
* c-c++-common/goacc/classify-kernels.c: Likewise.
* c-c++-common/goacc/classify-parallel.c: Likewise.
* c-c++-common/goacc/classify-serial.c: Likewise.
* c-c++-common/goacc/kernels-counter-vars-function-scope.c: Likewise.
* c-c++-common/goacc/kernels-loop-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-3.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
* c-c++-common/goacc/kernels-loop-data.c: Likewise.
* c-c++-common/goacc/kernels-loop-g.c: Likewise.
* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
* c-c++-common/goacc/kernels-loop-n.c: Likewise.
* c-c++-common/goacc/kernels-loop-nest.c: Likewise.
* c-c++-common/goacc/kernels-loop.c: Likewise.
* c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Likewise.
* gfortran.dg/goacc/classify-kernels-parloops.f95: Likewise.
* gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95:
Likewise.
* gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
* gfortran.dg/goacc/classify-kernels.f95: Likewise.
* gfortran.dg/goacc/classify-parallel.f95: Likewise.
* gfortran.dg/goacc/classify-serial.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
* gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
* gfortran.dg/goacc/kernels-loop.f95: Likewise.
* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: Likewise.

(cherry picked from commit d6621a2f3176dd6a593d4f5fa7f85db0234b40d2)

53 files changed:
gcc/ChangeLog.omp
gcc/internal-fn.cc
gcc/internal-fn.def
gcc/lto-cgraph.cc
gcc/omp-expand.cc
gcc/omp-low.cc
gcc/omp-offload.cc
gcc/testsuite/ChangeLog.omp
gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c
gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c
gcc/testsuite/c-c++-common/goacc/classify-kernels.c
gcc/testsuite/c-c++-common/goacc/classify-parallel.c
gcc/testsuite/c-c++-common/goacc/classify-serial.c
gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c
gcc/testsuite/c-c++-common/goacc/kernels-loop.c
gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c
gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c
gcc/testsuite/c-c++-common/gomp/reverse-offload-1.c
gcc/testsuite/c-c++-common/gomp/target-device-ancestor-4.c
gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95
gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95
gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
gcc/testsuite/gfortran.dg/goacc/classify-parallel.f95
gcc/testsuite/gfortran.dg/goacc/classify-serial.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop-n.f95
gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95
gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95
gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-4.f90
gcc/testsuite/gfortran.dg/gomp/target-device-ancestor-5.f90
libgomp/ChangeLog.omp
libgomp/libgomp.texi
libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c [new file with mode: 0644]
libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c [new file with mode: 0644]
libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90 [new file with mode: 0644]
libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90 [new file with mode: 0644]

index a665cc19ef0c62b6c893982bb434ff511df52dd8..ff9d34b5c7ced432a62c540a42aeff33853ac3b9 100644 (file)
@@ -1,3 +1,22 @@
+2022-08-30  Tobias Burnus  <tobias@codesourcery.com>
+
+       Backport from mainline:
+       2022-08-26  Tobias Burnus  <tobias@codesourcery.com>
+
+       * internal-fn.cc (expand_GOMP_TARGET_REV): New.
+       * internal-fn.def (GOMP_TARGET_REV): New.
+       * lto-cgraph.cc (lto_output_node, verify_node_partition): Mark
+       'omp target device_ancestor_host' as in_other_partition and don't
+       error if absent.
+       * omp-low.cc (create_omp_child_function): Mark as 'noclone'.
+       * omp-expand.cc (expand_omp_target): For reverse offload, remove
+       sorry, use device = GOMP_DEVICE_HOST_FALLBACK and create
+       empty-body nohost function.
+       * omp-offload.cc (execute_omp_device_lower): Handle
+       IFN_GOMP_TARGET_REV.
+       (pass_omp_target_link::execute): For ACCEL_COMPILER, don't
+       nullify fn argument for reverse offload
+
 2022-08-22  Tobias Burnus  <tobias@codesourcery.com>
 
        Backport from mainline:
index 3de40f4d90555e93bd7f478b3319339aa44d9096..a2227270d8eece5c0cad955db821ee16c746253a 100644 (file)
@@ -287,6 +287,14 @@ expand_GOMP_SIMT_VF (internal_fn, gcall *)
   gcc_unreachable ();
 }
 
+/* This should get expanded in omp_device_lower pass.  */
+
+static void
+expand_GOMP_TARGET_REV (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
 /* Lane index of the first SIMT lane that supplies a non-zero argument.
    This is a SIMT counterpart to GOMP_SIMD_LAST_LANE, used to represent the
    lane that executed the last iteration for handling OpenMP lastprivate.  */
index d2d550d358606022b1cb44fa842f06e0be507bc3..f45c2af044c8f154b0c675109bd1dfcbfe56240d 100644 (file)
@@ -313,6 +313,7 @@ DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
 DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
 DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
 
+DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
index 39af9c1bd072c444f09778b647b8c5e82b498b17..95d2774fc45d5d3d389a2c0635c7645cfef615d7 100644 (file)
@@ -430,6 +430,14 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
         after reading back.  */
       in_other_partition = 1;
     }
+  else if (__builtin_expect (
+            lto_stream_offload_p
+            && lookup_attribute ("omp target device_ancestor_host",
+                                 DECL_ATTRIBUTES (node->decl)), 0))
+    /* This symbol is only used as argument to IFN_GOMP_TARGET_REV; this IFN
+       is ignored on ACCEL_COMPILER.  Thus, mark it as in_other_partition to silence
+       verify_node_partition diagnostic.  */
+    in_other_partition = 1;
 
   clone_of = node->clone_of;
   while (clone_of
@@ -1140,10 +1148,15 @@ verify_node_partition (symtab_node *node)
   if (node->in_other_partition)
     {
       if (TREE_CODE (node->decl) == FUNCTION_DECL)
-       error_at (DECL_SOURCE_LOCATION (node->decl),
-                 "function %qs has been referenced in offloaded code but"
-                 " hasn%'t been marked to be included in the offloaded code",
-                 node->name ());
+       {
+         if (lookup_attribute ("omp target device_ancestor_host",
+                               DECL_ATTRIBUTES (node->decl)) != NULL)
+           return;
+         error_at (DECL_SOURCE_LOCATION (node->decl),
+                   "function %qs has been referenced in offloaded code but"
+                   " hasn%'t been marked to be included in the offloaded code",
+                   node->name ());
+       }
       else if (VAR_P (node->decl))
        error_at (DECL_SOURCE_LOCATION (node->decl),
                  "variable %qs has been referenced in offloaded code but"
index 1e85ac285e237a808de86b28b21a0e8b0548c61b..9f887c2d0e162ef74c3831fb726a1503c97e4b14 100644 (file)
@@ -9734,7 +9734,7 @@ expand_omp_target (struct omp_region *region)
 {
   basic_block entry_bb, exit_bb, new_bb;
   struct function *child_cfun;
-  tree child_fn, block, t;
+  tree child_fn, child_fn2, block, t, c;
   gimple_stmt_iterator gsi;
   gomp_target *entry_stmt;
   gimple *stmt;
@@ -9776,10 +9776,16 @@ expand_omp_target (struct omp_region *region)
       gcc_unreachable ();
     }
 
-  child_fn = NULL_TREE;
+  tree clauses = gimple_omp_target_clauses (entry_stmt);
+
+  bool is_ancestor = false;
+  child_fn = child_fn2 = NULL_TREE;
   child_cfun = NULL;
   if (offloaded)
     {
+      c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
+      if (ENABLE_OFFLOADING && c)
+       is_ancestor = OMP_CLAUSE_DEVICE_ANCESTOR (c);
       child_fn = gimple_omp_target_child_fn (entry_stmt);
       child_cfun = DECL_STRUCT_FUNCTION (child_fn);
     }
@@ -9979,7 +9985,8 @@ expand_omp_target (struct omp_region *region)
        {
          if (in_lto_p)
            DECL_PRESERVE_P (child_fn) = 1;
-         vec_safe_push (offload_funcs, child_fn);
+         if (!is_ancestor)
+           vec_safe_push (offload_funcs, child_fn);
        }
 
       bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
@@ -10018,11 +10025,88 @@ expand_omp_target (struct omp_region *region)
        }
 
       adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
+
+      /* Handle the case that an inner ancestor:1 target is called by an outer
+        target region. */
+      if (!is_ancestor)
+       cgraph_node::get (child_fn)->calls_declare_variant_alt
+         |= cgraph_node::get (cfun->decl)->calls_declare_variant_alt;
+      else  /* Duplicate function to create empty nonhost variant. */
+       {
+         /* Enable pass_omp_device_lower pass.  */
+         cgraph_node::get (cfun->decl)->calls_declare_variant_alt = 1;
+         cgraph_node *fn2_node;
+         child_fn2 = build_decl (DECL_SOURCE_LOCATION (child_fn),
+                                 FUNCTION_DECL,
+                                 clone_function_name (child_fn, "nohost"),
+                                 TREE_TYPE (child_fn));
+         if (in_lto_p)
+           DECL_PRESERVE_P (child_fn2) = 1;
+         TREE_STATIC (child_fn2) = 1;
+         DECL_ARTIFICIAL (child_fn2) = 1;
+         DECL_IGNORED_P (child_fn2) = 0;
+         TREE_PUBLIC (child_fn2) = 0;
+         DECL_UNINLINABLE (child_fn2) = 1;
+         DECL_EXTERNAL (child_fn2) = 0;
+         DECL_CONTEXT (child_fn2) = NULL_TREE;
+         DECL_INITIAL (child_fn2) = make_node (BLOCK);
+         BLOCK_SUPERCONTEXT (DECL_INITIAL (child_fn2)) = child_fn2;
+         DECL_ATTRIBUTES (child_fn)
+           = remove_attribute ("omp target entrypoint",
+                               DECL_ATTRIBUTES (child_fn));
+         DECL_ATTRIBUTES (child_fn2)
+           = tree_cons (get_identifier ("omp target device_ancestor_nohost"),
+                        NULL_TREE, copy_list (DECL_ATTRIBUTES (child_fn)));
+         DECL_ATTRIBUTES (child_fn)
+           = tree_cons (get_identifier ("omp target device_ancestor_host"),
+                        NULL_TREE, DECL_ATTRIBUTES (child_fn));
+         DECL_FUNCTION_SPECIFIC_OPTIMIZATION (child_fn2)
+           = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (current_function_decl);
+         DECL_FUNCTION_SPECIFIC_TARGET (child_fn2)
+           = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
+         DECL_FUNCTION_VERSIONED (child_fn2)
+           = DECL_FUNCTION_VERSIONED (current_function_decl);
+
+         fn2_node = cgraph_node::get_create (child_fn2);
+         fn2_node->offloadable = 1;
+         fn2_node->force_output = 1;
+         node->offloadable = 0;
+
+         t = build_decl (DECL_SOURCE_LOCATION (child_fn),
+                         RESULT_DECL, NULL_TREE, void_type_node);
+         DECL_ARTIFICIAL (t) = 1;
+         DECL_IGNORED_P (t) = 1;
+         DECL_CONTEXT (t) = child_fn2;
+         DECL_RESULT (child_fn2) = t;
+         DECL_SAVED_TREE (child_fn2) = build1 (RETURN_EXPR,
+                                               void_type_node, NULL);
+         tree tmp = DECL_ARGUMENTS (child_fn);
+         t = build_decl (DECL_SOURCE_LOCATION (child_fn), PARM_DECL,
+                         DECL_NAME (tmp), TREE_TYPE (tmp));
+         DECL_ARTIFICIAL (t) = 1;
+         DECL_NAMELESS (t) = 1;
+         DECL_ARG_TYPE (t) = ptr_type_node;
+         DECL_CONTEXT (t) = current_function_decl;
+         TREE_USED (t) = 1;
+         TREE_READONLY (t) = 1;
+         DECL_ARGUMENTS (child_fn2) = t;
+         gcc_assert (TREE_CHAIN (tmp) == NULL_TREE);
+
+         gimplify_function_tree (child_fn2);
+         cgraph_node::add_new_function (child_fn2, true);
+
+         vec_safe_push (offload_funcs, child_fn2);
+         if (dump_file && !gimple_in_ssa_p (cfun))
+           {
+             dump_function_header (dump_file, child_fn2, dump_flags);
+             dump_function_to_file (child_fn2, dump_file, dump_flags);
+           }
+       }
     }
 
   /* Emit a library call to launch the offloading region, or do data
      transfers.  */
-  tree t1, t2, t3, t4, depend, c, clauses;
+  tree t1, t2, t3, t4, depend;
   enum built_in_function start_ix;
   unsigned int flags_i = 0;
 
@@ -10073,8 +10157,6 @@ expand_omp_target (struct omp_region *region)
       gcc_unreachable ();
     }
 
-  clauses = gimple_omp_target_clauses (entry_stmt);
-
   tree device = NULL_TREE;
   location_t device_loc = UNKNOWN_LOCATION;
   tree goacc_flags = NULL_TREE;
@@ -10106,7 +10188,8 @@ expand_omp_target (struct omp_region *region)
            need_device_adjustment = true;
          device_loc = OMP_CLAUSE_LOCATION (c);
          if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
-           sorry_at (device_loc, "%<ancestor%> not yet supported");
+           device = build_int_cst (integer_type_node,
+                                   GOMP_DEVICE_HOST_FALLBACK);
        }
       else
        {
@@ -10294,7 +10377,7 @@ expand_omp_target (struct omp_region *region)
   else
     args.quick_push (device);
   if (offloaded)
-    args.quick_push (build_fold_addr_expr (child_fn));
+    args.quick_push (build_fold_addr_expr (child_fn2 ? child_fn2 : child_fn));
   args.quick_push (t1);
   args.quick_push (t2);
   args.quick_push (t3);
@@ -10436,6 +10519,13 @@ expand_omp_target (struct omp_region *region)
       for (int i = 3; i < TREE_VEC_LENGTH (t); i++)
        args.safe_push (TREE_VEC_ELT (t, i));
     }
+  if (child_fn2)
+    {
+      g = gimple_build_call_internal (IFN_GOMP_TARGET_REV, 1,
+                                     build_fold_addr_expr (child_fn));
+      gimple_set_location (g, gimple_location (entry_stmt));
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+    }
 
   g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
   gimple_set_location (g, gimple_location (entry_stmt));
index 0ec33c73bca060db462e3b41156b7a736d250d7b..ea51ff9e6973632099c5ddd9e120d6a54df35481 100644 (file)
@@ -2410,6 +2410,11 @@ create_omp_child_function (omp_context *ctx, bool task_copy)
          else
            target_attr = NULL;
        }
+      if (target_attr
+         && is_gimple_omp_offloaded (ctx->stmt)
+         && lookup_attribute ("noclone", DECL_ATTRIBUTES (decl)) == NULL_TREE)
+       DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("noclone"),
+                                          NULL_TREE, DECL_ATTRIBUTES (decl));
       if (target_attr)
        DECL_ATTRIBUTES (decl)
          = tree_cons (get_identifier (target_attr),
index ac236482db5005fdaf582ace48e003e14861a517..979bc0badb2bc56bae11006d6e93cfc53b494272 100644 (file)
@@ -3305,6 +3305,47 @@ execute_omp_device_lower ()
        tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
        switch (gimple_call_internal_fn (stmt))
          {
+         case IFN_GOMP_TARGET_REV:
+           {
+#ifndef ACCEL_COMPILER
+             gimple_stmt_iterator gsi2 = gsi;
+             gsi_next (&gsi2);
+             gcc_assert (!gsi_end_p (gsi2));
+             gcc_assert (gimple_call_builtin_p (gsi_stmt (gsi2),
+                                                BUILT_IN_GOMP_TARGET));
+             tree old_decl
+               = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi2), 1), 0);
+             tree new_decl = gimple_call_arg (gsi_stmt (gsi), 0);
+             gimple_call_set_arg (gsi_stmt (gsi2), 1, new_decl);
+             update_stmt (gsi_stmt (gsi2));
+             new_decl = TREE_OPERAND (new_decl, 0);
+             unsigned i;
+             unsigned num_funcs = vec_safe_length (offload_funcs);
+             for (i = 0; i < num_funcs; i++)
+               {
+                 if ((*offload_funcs)[i] == old_decl)
+                   {
+                     (*offload_funcs)[i] = new_decl;
+                     break;
+                   }
+                 else if ((*offload_funcs)[i] == new_decl)
+                   break;  /* This can happen due to inlining.  */
+               }
+             gcc_assert (i < num_funcs);
+#else
+             tree old_decl = TREE_OPERAND (gimple_call_arg (gsi_stmt (gsi), 0),
+                                           0);
+#endif
+             /* FIXME: Find a way to actually prevent outputting the empty-body
+                old_decl as debug symbol + function in the assembly file.  */
+             cgraph_node *node = cgraph_node::get (old_decl);
+             node->address_taken = false;
+             node->need_lto_streaming = false;
+             node->offloadable = false;
+
+             unlink_stmt_vdef (stmt);
+           }
+           break;
          case IFN_GOMP_USE_SIMT:
            rhs = vf == 1 ? integer_zero_node : integer_one_node;
            break;
@@ -3481,6 +3522,15 @@ pass_omp_target_link::execute (function *fun)
        {
          if (gimple_call_builtin_p (gsi_stmt (gsi), BUILT_IN_GOMP_TARGET))
            {
+             tree dev = gimple_call_arg (gsi_stmt (gsi), 0);
+             tree fn = gimple_call_arg (gsi_stmt (gsi), 1);
+             if (POINTER_TYPE_P (TREE_TYPE (fn)))
+               fn = TREE_OPERAND (fn, 0);
+             if (TREE_CODE (dev) == INTEGER_CST
+                 && wi::to_wide (dev) == GOMP_DEVICE_HOST_FALLBACK
+                 && lookup_attribute ("omp target device_ancestor_nohost",
+                                      DECL_ATTRIBUTES (fn)) != NULL_TREE)
+               continue;  /* ancestor:1  */
              /* Nullify the second argument of __builtin_GOMP_target_ext.  */
              gimple_call_set_arg (gsi_stmt (gsi), 1, null_pointer_node);
              update_stmt (gsi_stmt (gsi));
index 12300cb52be0874a07b28339d300fd6da756b089..76600f3c77d235163d81c7d846bcb8498d45d159 100644 (file)
@@ -1,3 +1,52 @@
+2022-08-30  Tobias Burnus  <tobias@codesourcery.com>
+
+       Backport from mainline:
+       2022-08-26  Tobias Burnus  <tobias@codesourcery.com>
+
+       * c-c++-common/gomp/reverse-offload-1.c: Remove dg-sorry.
+       * c-c++-common/gomp/target-device-ancestor-4.c: Likewise.
+       * gfortran.dg/gomp/target-device-ancestor-4.f90: Likewise.
+       * gfortran.dg/gomp/target-device-ancestor-5.f90: Likewise.
+       * c-c++-common/goacc/classify-kernels-parloops.c: Add 'noclone' to
+       scan-tree-dump-times.
+       * c-c++-common/goacc/classify-kernels-unparallelized-parloops.c:
+       Likewise.
+       * c-c++-common/goacc/classify-kernels-unparallelized.c: Likewise.
+       * c-c++-common/goacc/classify-kernels.c: Likewise.
+       * c-c++-common/goacc/classify-parallel.c: Likewise.
+       * c-c++-common/goacc/classify-serial.c: Likewise.
+       * c-c++-common/goacc/kernels-counter-vars-function-scope.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-2.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-3.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-data.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-g.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-mod-not-zero.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-n.c: Likewise.
+       * c-c++-common/goacc/kernels-loop-nest.c: Likewise.
+       * c-c++-common/goacc/kernels-loop.c: Likewise.
+       * c-c++-common/goacc/kernels-one-counter-var.c: Likewise.
+       * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Likewise.
+       * gfortran.dg/goacc/classify-kernels-parloops.f95: Likewise.
+       * gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95:
+       Likewise.
+       * gfortran.dg/goacc/classify-kernels-unparallelized.f95: Likewise.
+       * gfortran.dg/goacc/classify-kernels.f95: Likewise.
+       * gfortran.dg/goacc/classify-parallel.f95: Likewise.
+       * gfortran.dg/goacc/classify-serial.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop-2.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop-data-2.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop-data-update.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop-data.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop-n.f95: Likewise.
+       * gfortran.dg/goacc/kernels-loop.f95: Likewise.
+       * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: Likewise.
+
 2022-08-30  Tobias Burnus  <tobias@codesourcery.com>
 
        * gfortran.dg/gomp/depend-6.f90: Update expected tree dumps.
index 5f470eb86bc00b31b64585b0ac9c47741f1fdd10..fc2b6375002ccd975ac9e143b903bdff4f0029d4 100644 (file)
@@ -27,16 +27,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can be parallelized.
    { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
    { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } } */
index a2cc59476978c0287b9ce9fc5c24005b2ff5fa12..3792a7c091945980fb84412870fbd6dc93de199c 100644 (file)
@@ -33,16 +33,16 @@ void KERNELS ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
 
 /* Check that exactly one OpenACC kernels construct is analyzed, and that it
    can't be parallelized.
    { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
    { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
    { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops1" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops1" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } } */
index 6e51447b401e5055127b122e52dc7c170eab85f5..b1266d45c83b487ba9cd7eaf713451768f700987 100644 (file)
@@ -38,4 +38,4 @@ void KERNELS ()
    always be 1 x 1 x 1 for non-offloading compilation).
    { dg-final { scan-tree-dump-times "(?n)Function is parallel_kernels_graphite OpenACC kernels offload" 1 "oaccloops1" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel_kernels_graphite, omp target entrypoint\\)\\)" 1 "oaccloops1" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel_kernels_graphite, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } } */
index d00d2b036f2aa0ae78b49eb4506738868051e05a..14e392b03415b8178aa4c726320280ba091ef1ac 100644 (file)
@@ -24,10 +24,10 @@ void PARALLEL ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
    { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccloops1" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccloops1" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } } */
index eedad55d49bef6c0c9cb5cc3b66607e9357d9959..f892009de355c81094cd84e51783cbe0c9469c66 100644 (file)
@@ -27,10 +27,10 @@ void SERIAL ()
 }
 
 /* Check the offloaded function's attributes.
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } } */
 
 /* Check the offloaded function's classification and compute dimensions (will
    always be 1 x 1 x 1 for non-offloading compilation).
    { dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccloops1" } }
    { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccloops1" } } */
+   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } } */
index e14ea2c4c72660aea3d801b7782e3df706276743..142ebc07ffa05c040591f280098fdc439e12b56d 100644 (file)
@@ -48,7 +48,7 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index eb95dddedd56f7ace4defd47777c85e29e54508b..7909ca461cce6cc69e5e294879001e064d1d7e5e 100644 (file)
@@ -62,7 +62,7 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index ae812583cf97550e7890dfacdffaf86cd22fae7f..3e9db6f51453dec96ed2a48cdb52872eb3204143 100644 (file)
@@ -42,7 +42,7 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index b9e0458eab1a3fc0aa0d54fcb72601f18756e8e4..37197abb5c8f71cfe70f826998be51a0523afcfe 100644 (file)
@@ -62,7 +62,7 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index 9a88e8e0d393bb7829a6e3f527c3398078d76a0a..1bf9635d512c93572b3a66069baf28c760ca82f7 100644 (file)
@@ -60,7 +60,7 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index 0a018820ca17bfd82db9f44533d0d6b01cf5e8d4..b0e2c4835c7a46e6370c53f6c19a01c3e7f31da7 100644 (file)
@@ -57,7 +57,7 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index 4821cb9675ed1147bd37fe8b4a512bdb90b6124e..38cb55830eaf64f6f42d50022239b93c870f7b68 100644 (file)
@@ -58,7 +58,7 @@ main (void)
 /* Check that only two loops are analyzed, and that both can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index d650bfa97181ae5422d8a2bdcb468c4667e7dc72..27eb18aa94ad9abbc29a5ac479d40d3ecc1f9e9f 100644 (file)
@@ -56,7 +56,7 @@ main (void)
 /* Check that only three loops are analyzed, and that all can be
    parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index ca0bc2e59a4ed051350883f30a6d294161918170..ca3ca5cdd97f1be2cf80e81c281862481a6d9a3c 100644 (file)
@@ -11,7 +11,7 @@
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index bcc2555837343eb5d7fbf95cd7ddf26a48eba386..b38484a007fcc5687f63b196f212647626590b3b 100644 (file)
@@ -46,7 +46,7 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index b9ffc11bb1c281fb641f08eea0d3d69d23b383d7..aaa3318ac87a12b710e24323d148682aaa8f8041 100644 (file)
@@ -49,7 +49,7 @@ foo (COUNTERTYPE n)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index a7cec7fab9ede4116b8254ac94a81550fd2f6364..a97e8a9520c80b6aa7353c16f0a896b60c12e83e 100644 (file)
@@ -33,7 +33,7 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index 954902edbd993bd27f8907b82737cba51a47a70e..771b88db06bf6add4f379ee24d2de6bb79e806b8 100644 (file)
@@ -49,7 +49,7 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index e2f27af76ed5c5f7b57dc9a54c5336167a69a1fe..34d3dd056640251cf202b507240e1d68a37ea844 100644 (file)
@@ -47,7 +47,7 @@ main (void)
 
 /* Check that only one loop is analyzed, and that it can be parallelized.  */
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
 
 /* Check that the loop has been split off into a function.  */
index 305974b88cedaccd9086073821e5fc68178043a6..abbfaa34f703cfdbdf1b82b9c1f20e49f19bf5c2 100644 (file)
@@ -60,7 +60,7 @@ main (void)
 // FIXME: OpenACC kernels stopped working with the firstprivate subarray
 // changes.
 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" { xfail *-*-* } } } */
 /* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } } */
 
 /* Check that the loop has been split off into a function.  */
index 3452156f9484ea0e99d12aa2fc7e24af3a1d095f..9a3fa5230f8b956e6a5bba676d8e4951b2f3c0d9 100644 (file)
@@ -43,7 +43,7 @@ tg_fn (int *x, int *y)
   x2 = x2 + 2 + called_in_target1 ();
   y2 = y2 + 7;
 
-  #pragma omp target device(ancestor : 1) map(tofrom: x2)  /* { dg-message "sorry, unimplemented: 'ancestor' not yet supported" } */
+  #pragma omp target device(ancestor : 1) map(tofrom: x2)
     check_offload(&x2, &y2);
 
   if (x2 != 2+2+3+42 || y2 != 3 + 7)
index 37cd1a0f1d3e7fa285b8f7af5eb84b0a0fa58958..87ac7548c23f4568a7994e98535fd9bf57485c01 100644 (file)
@@ -9,7 +9,7 @@
 void
 foo (void)
 {
-  #pragma omp target device (ancestor: 1) /* { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" { xfail *-*-* } } */
+  #pragma omp target device (ancestor: 1)
   ;
 
 }
index 96814a1697d3c99bd57b85f2ea810515d5d9d4df..5dd763faffe2e99ef5fd80b1d7f4a289c9c531d6 100644 (file)
@@ -29,16 +29,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
 ! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops" } }
index 3da7f2c711de9619fbacef1bb6a2a0636ff4153d..691d33913ab36b933f23caed16ff7b671bdff431 100644 (file)
@@ -30,16 +30,16 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
 
 ! Check that exactly one OpenACC kernels construct is analyzed, and that it
 ! can't be parallelized.
 ! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
 ! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops1" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } }
index 297ef48b9e5005ca24ee8adc3f3bc82f90bfdcc7..a132b47f27c596162e707c54d733216150b20d70 100644 (file)
@@ -33,10 +33,10 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel_kernels_graphite, omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel_kernels_graphite, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
 ! { dg-final { scan-tree-dump-not "^assigned OpenACC.*?loop parallelism$" "oaccloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel_kernels_graphite, omp target entrypoint\\)\\)" 1 "oaccloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel_kernels_graphite, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } }
index 411bc45390ffd06d7ce39903ec938410c96a3f41..97486ba61d8d1a49910b8ad94bc6a878373124d9 100644 (file)
@@ -33,4 +33,4 @@ end program main
 ! always be 1 x 1 x 1 for non-offloading compilation).
 ! { dg-final { scan-tree-dump-times "(?n)Function is parallel_kernels_graphite OpenACC kernels offload" 1 "oaccloops1" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel_kernels_graphite, omp target entrypoint\\)\\)" 1 "oaccloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel_kernels_graphite, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } }
index c18ab5a2e57be5fb132fdd9ceb58f1dc935912a2..dc35ac7a090de6d07454e2bf5eb7482ff5d3cf6d 100644 (file)
@@ -26,10 +26,10 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
 ! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccloops1" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } }
index 233b94acdb1cfd72097a38a2805d0a27e02224ed..425a41768b8faa663627050e211012b793c774de 100644 (file)
@@ -26,10 +26,10 @@ program main
 end program main
 
 ! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint, noclone\\)\\)" 1 "ompexp" } }
 
 ! Check the offloaded function's classification and compute dimensions (will
 ! always be 1 x 1 x 1 for non-offloading compilation).
 ! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccloops1" } }
 ! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint, noclone\\)\\)" 1 "oaccloops1" } }
index f3ab71da5f806ffaf56e8063e7c9767609ed16e1..206879540dec25e4a4e28124a177292b888ce639 100644 (file)
@@ -37,7 +37,7 @@ end program main
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "loop has no data-dependences" 6 "graphite" } } ! Two CFG loops per OpenACC loop
 ! { dg-final { scan-tree-dump-not "loop has data-dependences" "graphite" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc parallel_kernels_graphite, omp target entrypoint\\)\\)" 3 "graphite" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc parallel_kernels_graphite, omp target entrypoint, noclone\\)\\)" 3 "graphite" } }
 
 ! Check that the loop has been split off into a function.
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } }
index 348a2e186d87b7e128c0adc4b3c74ba1c9e2f15f..edb570e3cb47fc4bfc5433cb997324c2363f95f0 100644 (file)
@@ -42,7 +42,7 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc parallel_kernels_graphite, omp target entrypoint\\)\\)" 3 "graphite" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc parallel_kernels_graphite, omp target entrypoint, noclone\\)\\)" 3 "graphite" } }
 ! { dg-final { scan-tree-dump-times "loop has no data-dependences" 6 "graphite" } } ! Two CFG loops per OpenACC loop
 
 ! Check that the loop has been split off into a function.
index 461279d75726e37044a9ca1c426b34c34a09ba2a..d748b58b965341ceac99fc1a78923b433e7142a3 100644 (file)
@@ -43,7 +43,7 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
index cfb3803501b8e6bb54bedabe54ecc43027aa8529..2afbf6497b8455c3d36df849e7e40d9c1d4206c6 100644 (file)
@@ -41,7 +41,7 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
index a9ec77b4db60e24398da307bf66faf4e9b7e47e6..869bc2f5ea6d648b08eaa956a0aae9bff119f365 100644 (file)
@@ -41,7 +41,7 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
index 53a082b782e6204dccdc59cd96980e62d4ef40dc..c123699d27aef74f386559b8bb01207f3b5b9bf8 100644 (file)
@@ -41,7 +41,7 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 3 "parloops1" } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
index 0d1021e30235e83f4ddeee56d70f5961196336a4..78cfa7d79e80546229eec62afc7957c5a984106f 100644 (file)
@@ -36,7 +36,7 @@ end module test
 ! Check that only one loop is analyzed, and that it can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
 ! TODO, PR70545.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" { xfail *-*-* } } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 1 "parloops1" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
 
 ! Check that the loop has been split off into a function.
index 7ac3831294f7276b716441d9dea72ac09e4619c9..48e17d6ebe3970a46f0fe45741e4583307861dff 100644 (file)
@@ -31,7 +31,7 @@ program main
 end program main
 
 ! Check that only one loop is analyzed, and that it can be parallelized.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc parallel_kernels_graphite, omp target entrypoint\\)\\)" 1 "graphite" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc parallel_kernels_graphite, omp target entrypoint, noclone\\)\\)" 1 "graphite" } }
 ! { dg-final { scan-tree-dump-times "loop has no data-dependences" 2 "graphite" } } ! Two CFG loops per OpenACC loop
 
 ! Check that the loop has been split off into a function.
index 7e9589de799ce7bb97ac76ae6e757d206470f605..552bb618cbc96d8f8404ecf44bd68129ae29a12e 100644 (file)
@@ -42,7 +42,7 @@ end program main
 
 ! Check that only three loops are analyzed, and that all can be parallelized.
 ! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint, noclone\\)\\)" 2 "parloops1" { xfail *-*-* } } }
 ! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } }
 
 ! Check that the loop has been split off into a function.
index 56aff24df50f817a8a769ac7921caf0756c5ac29..d73adf2c5a7253305deb7ce21385b3bae711ec47 100644 (file)
@@ -6,7 +6,7 @@
 
 !$omp requires reverse_offload
 
-!$omp target device (ancestor : 1)  ! { dg-message "" "sorry, unimplemented: 'ancestor' not yet supported" }
+!$omp target device (ancestor : 1)
 !$omp end target
 
 end
index ca8d4b282a0d9842a3be3560d10be815c5c11a42..9596d61f6fae11e9aa8d701c75baba33bc6a94b1 100644 (file)
@@ -17,7 +17,7 @@ contains
     block
       block
         block
-          !$omp target device(ancestor:1)  ! { dg-message "sorry, unimplemented: 'ancestor' not yet supported" }
+          !$omp target device(ancestor:1)
           !$omp end target
         end block
       end block
index baa861ccff428bb2500da8ef3dca6501bf8e6538..316e5ee4bbe405db1403e83bc53d6170d6d8a7e9 100644 (file)
@@ -1,3 +1,15 @@
+2022-08-30  Tobias Burnus  <tobias@codesourcery.com>
+
+       Backport from mainline:
+       2022-08-26  Tobias Burnus  <tobias@codesourcery.com>
+
+       * libgomp.texi (OpenMP 5.0): Mark 'ancestor' as implemented but
+       refer to 'requires'.
+       * testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c: New test.
+       * testsuite/libgomp.c-c++-common/reverse-offload-1.c: New test.
+       * testsuite/libgomp.fortran/reverse-offload-1-aux.f90: New test.
+       * testsuite/libgomp.fortran/reverse-offload-1.f90: New test.
+
 2022-08-17  Tobias Burnus  <tobias@codesourcery.com>
 
        Backport from mainline:
index 527d457d8a92303218c5c8569814899a2010e584..77987619cf8552e6649bfb141b5f751e2b064ce8 100644 (file)
@@ -225,7 +225,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{allocate} clause @tab P @tab Initial support
 @item @code{use_device_addr} clause on @code{target data} @tab Y @tab
 @item @code{ancestor} modifier on @code{device} clause
-      @tab P @tab Reverse offload unsupported
+      @tab Y @tab See comment for @code{requires}
 @item Implicit declare target directive @tab Y @tab
 @item Discontiguous array section with @code{target update} construct
       @tab N @tab
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1-aux.c
new file mode 100644 (file)
index 0000000..b3a331d
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do compile { target skip-all-targets } }  */
+
+/* Declare the following function in a separare translation unit
+   to ensure it won't have a device version.  */
+
+int
+add_3 (int x)
+{
+  return x + 3;
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-1.c
new file mode 100644 (file)
index 0000000..976e129
--- /dev/null
@@ -0,0 +1,83 @@
+/* { dg-do run }  */
+/* { dg-additional-sources reverse-offload-1-aux.c } */
+
+/* Check that reverse offload works in particular:
+   - no code is generated on the device side (i.e. no
+     implicit declare target of called functions and no
+     code gen for the target-region body)
+     -> would otherwise fail due to 'add_3' symbol
+   - Plus the usual (compiles, runs, produces correct result)
+
+   Note: Running also the non-reverse-offload target regions
+   on the host (host fallback) is valid and will pass.  */
+
+#pragma omp requires reverse_offload
+
+extern int add_3 (int);
+
+static int global_var = 5;
+
+void
+check_offload (int *x, int *y)
+{
+  *x = add_3 (*x);
+  *y = add_3 (*y);
+}
+
+#pragma omp declare target
+void
+tg_fn (int *x, int *y)
+{
+  int x2 = *x, y2 = *y;
+  if (x2 != 2 || y2 != 3)
+    __builtin_abort ();
+  x2 = x2 + 2;
+  y2 = y2 + 7;
+
+  #pragma omp target device(ancestor : 1) map(tofrom: x2)
+    check_offload(&x2, &y2);
+
+  if (x2 != 2+2+3 || y2 != 3 + 7)
+    __builtin_abort ();
+  *x = x2, *y = y2;
+}
+#pragma omp end declare target
+
+void
+my_func (int *x, int *y)
+{
+  if (global_var != 5)
+    __builtin_abort ();
+  global_var = 242;
+  *x = 2*add_3(*x);
+  *y = 3*add_3(*y);
+}
+
+int
+main ()
+{
+  #pragma omp target
+  {
+     int x = 2, y = 3;
+     tg_fn (&x, &y);
+  }
+
+  #pragma omp target
+  {
+     int x = -2, y = -1;
+     #pragma omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x)
+     {
+       if (x != -2 || y != -1)
+         __builtin_abort ();
+       my_func (&x, &y);
+       if (x != 2*(3-2) || y != 3*(3-1))
+         __builtin_abort ();
+     }
+     if (x != 2*(3-2) || y != -1)
+       __builtin_abort ();
+  }
+
+  if (global_var != 242)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-1-aux.f90
new file mode 100644 (file)
index 0000000..1807f06
--- /dev/null
@@ -0,0 +1,12 @@
+! { dg-do compile { target skip-all-targets } }
+
+! Declare the following function in a separare translation unit
+! to ensure it won't have a device version.
+
+
+integer function add_3 (x)
+  implicit none
+  integer, value :: x
+
+  add_3 = x + 3
+end function
diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-1.f90
new file mode 100644 (file)
index 0000000..7cfb8b6
--- /dev/null
@@ -0,0 +1,88 @@
+! { dg-do run }
+! { dg-additional-sources reverse-offload-1-aux.f90 }
+
+! Check that reverse offload works in particular:
+! - no code is generated on the device side (i.e. no
+!   implicit declare target of called functions and no
+!   code gen for the target-region body)
+!   -> would otherwise fail due to 'add_3' symbol
+! - Plus the usual (compiles, runs, produces correct result)
+
+! Note: Running also the non-reverse-offload target regions
+! on the host (host fallback) is valid and will pass.
+
+module m
+  interface
+    integer function add_3 (x)
+      implicit none
+      integer, value :: x
+    end function
+  end interface
+  integer :: global_var = 5
+end module m
+
+module m2
+  use m
+  !$omp requires reverse_offload
+  implicit none (type, external)
+contains
+  subroutine check_offload (x, y)
+    integer :: x, y
+    x = add_3(x)
+    y = add_3(y)
+  end subroutine check_offload
+  subroutine m2_tg_fn(x, y)
+    integer :: x, y
+    !$omp declare target
+    if (x /= 2 .or. y /= 3) stop 1
+    x = x + 2
+    y = y + 7
+    !$omp target device(ancestor : 1) map(tofrom: x)
+      call check_offload(x, y)
+    !$omp end target
+    if (x /= 2+2+3 .or. y /= 3 + 7) stop 2
+  end subroutine
+end module m2
+
+program main
+  use m
+  !$omp requires reverse_offload
+  implicit none (type, external)
+
+  integer :: prog_var = 99
+
+  !$omp target
+  block
+    use m2
+    integer :: x, y
+    x = 2; y = 3
+    call m2_tg_fn (x, y)
+  end block
+
+  !$omp target
+  block
+    use m2
+    integer :: x, y
+    x = -2; y = -1
+    !$omp target device ( ancestor:1 ) firstprivate(y) map(tofrom:x)
+      if (x /= -2 .or. y /= -1) stop 3
+      call my_func (x, y)
+      if (x /= 2*(3-2) .or. y /= 3*(3-1)) stop 5
+    !$omp end target
+    if (x /= 2*(3-2) .or. y /= -1) stop 6
+  end block
+
+  if (prog_var /= 41 .or. global_var /= 242) stop 7
+
+contains
+
+  subroutine my_func(x, y)
+    integer :: x, y
+    if (prog_var /= 99) stop 8
+    if (global_var /= 5) stop 9
+    prog_var = 41
+    global_var = 242
+    x = 2*add_3(x)
+    y = 3*add_3(y)
+  end subroutine my_func
+end