]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Add changes to profiling interface from OG8 branch
authorKwok Cheung Yeung <kcy@codesourcery.com>
Fri, 21 Jun 2019 17:40:38 +0000 (10:40 -0700)
committerThomas Schwinge <thomas@codesourcery.com>
Tue, 3 Mar 2020 11:50:41 +0000 (12:50 +0100)
This bundles up the parts of the profiling code from the OG8 branch that were
not included in the upstream patch.

libgomp/
* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update.

libgomp/
* oacc-init.c (get_property_any): Add profiling code.

libgomp/
* Makefile.am (libgomp_la_SOURCES): Add
oacc-profiling-acc_register_library.c.
* Makefile.in: Regenerate.
* libgomp.texi: Remove paragraph about acc_register_library.
* oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for
profiling.
* oacc-profiling-acc_register_library.c: New file.
* oacc-profiling.c (goacc_profiling_initialize): Call
acc_register_library.  Avoid duplicate registration.
(acc_register_library): Remove.
* config/nvptx/oacc-profiling-acc_register_library.c:
New empty file.
* config/nvptx/oacc-profiling.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove
call to acc_register_library.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise.

(cherry picked from openacc-gcc-9-branch commit
b1321d52402d217793a27592e2959b07319df75b)

16 files changed:
libgomp/ChangeLog.omp
libgomp/Makefile.am
libgomp/Makefile.in
libgomp/config/nvptx/oacc-profiling-acc_register_library.c [new file with mode: 0644]
libgomp/config/nvptx/oacc-profiling.c [new file with mode: 0644]
libgomp/libgomp.texi
libgomp/oacc-init.c
libgomp/oacc-parallel.c
libgomp/oacc-profiling-acc_register_library.c [new file with mode: 0644]
libgomp/oacc-profiling.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c
libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c

index 66f2a80850b4f0f4591d60bd6710f8e863db2e38..caf5f6e2fb1b4ebfd1582cec59c8d2cb8a85c8ac 100644 (file)
@@ -1,3 +1,35 @@
+2019-01-23  Thomas Schwinge <thomas@codesourcery.com>
+
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update.
+
+2018-12-20  Maciej W. Rozycki  <macro@codesourcery.com>
+
+       * oacc-init.c (get_property_any): Add profiling code.
+
+2017-02-28  Thomas Schwinge  <thomas@codesourcery.com>
+
+       * Makefile.am (libgomp_la_SOURCES): Add
+       oacc-profiling-acc_register_library.c.
+       * Makefile.in: Regenerate.
+       * libgomp.texi: Remove paragraph about acc_register_library.
+       * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for
+       profiling.
+       * oacc-profiling-acc_register_library.c: New file.
+       * oacc-profiling.c (goacc_profiling_initialize): Call
+       acc_register_library.  Avoid duplicate registration.
+       (acc_register_library): Remove.
+       * config/nvptx/oacc-profiling-acc_register_library.c:
+       New empty file.
+       * config/nvptx/oacc-profiling.c: Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove
+       call to acc_register_library.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c:
+       Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise.
+
 2019-05-17  Thomas Schwinge  <thomas@codesourcery.com>
 
        * acc_prof.h: New file.
index 50918707dd78df09f9a927d22652e7423b16b60e..00848cdc792e86fc435a04da8550deb00331c82a 100644 (file)
@@ -72,7 +72,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
        proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
        splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
        oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
-       affinity-fmt.c teams.c oacc-profiling.c
+       affinity-fmt.c teams.c oacc-profiling.c \
+       oacc-profiling-acc_register_library.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
index 23efc844bde1fa1e5ec9d3a4bae55b5ac79a9b8a..63f919cb7ca383af3fc467dc22dad883e78174e7 100644 (file)
@@ -16,7 +16,7 @@
 
 # Plugins for offload execution, Makefile.am fragment.
 #
-# Copyright (C) 2014-2018 Free Software Foundation, Inc.
+# Copyright (C) 2014-2019 Free Software Foundation, Inc.
 #
 # Contributed by Mentor Embedded.
 #
@@ -216,7 +216,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
        target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
        oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
        oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
-       teams.lo oacc-profiling.lo $(am__objects_1)
+       teams.lo oacc-profiling.lo \
+       oacc-profiling-acc_register_library.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -524,7 +525,7 @@ search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) \
 
 fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude
 libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
-libgomp_la_LIBADD = $(LIBFFI)
+@USE_LIBFFI_TRUE@libgomp_la_LIBADD = $(LIBFFI)
 AM_CPPFLAGS = $(addprefix -I, $(search_path)) $(LIBFFIINCS)
 AM_CFLAGS = $(XCFLAGS)
 AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
@@ -553,7 +554,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
        affinity.c target.c splay-tree.c libgomp-plugin.c \
        oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
        oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
-       affinity-fmt.c teams.c oacc-profiling.c $(am__append_3)
+       affinity-fmt.c teams.c oacc-profiling.c \
+       oacc-profiling-acc_register_library.c $(am__append_3)
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -755,6 +757,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
diff --git a/libgomp/config/nvptx/oacc-profiling-acc_register_library.c b/libgomp/config/nvptx/oacc-profiling-acc_register_library.c
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/libgomp/config/nvptx/oacc-profiling.c b/libgomp/config/nvptx/oacc-profiling.c
new file mode 100644 (file)
index 0000000..e69de29
index 2e052725aaea026a7ac83a4569d655bbafd211a3..5c9cf9e001d7293988b5e872ae7e109900178d03 100644 (file)
@@ -3254,14 +3254,6 @@ every event that has been registered.
 We're not yet accounting for the fact that @cite{OpenACC events may
 occur during event processing}.
 
-We're not yet implementing initialization via a
-@code{acc_register_library} function that is either statically linked
-in, or dynamically via @env{LD_PRELOAD}.
-Initialization via @code{acc_register_library} functions dynamically
-loaded via the @env{ACC_PROFLIB} environment variable does work, as
-does directly calling @code{acc_prof_register},
-@code{acc_prof_unregister}, @code{acc_prof_lookup}.
-
 As currently there are no inquiry functions defined, calls to
 @code{acc_prof_lookup} will always return @code{NULL}.
 
index f7ea58fe769d1ff2aee6a3862c8ce9061b13458d..766f59f39635cae90eef737cd41851bb53e83314 100644 (file)
@@ -791,14 +791,30 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
   if (d == acc_device_current && (!thr || !thr->dev))
     return (union gomp_device_property_value) { .val = 0 };
 
+  acc_prof_info prof_info;
+  acc_api_info api_info;
+  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
   if (d == acc_device_current)
     {
+      if (profiling_p)
+       {
+         prof_info.device_type = acc_device_type (thr->dev->type);
+         prof_info.device_number = thr->dev->target_id;
+       }
+
       dev = thr->dev;
     }
   else
     {
       int num_devices;
 
+      if (profiling_p)
+       {
+         prof_info.device_type = d;
+         prof_info.device_number = ord;
+       }
+
       gomp_mutex_lock (&acc_device_lock);
 
       dev = resolve_device (d, false);
@@ -822,6 +838,12 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
 
   propval = dev->get_property_func (dev->target_id, prop);
 
+  if (profiling_p)
+    {
+      thr->prof_info = NULL;
+      thr->api_info = NULL;
+    }
+
   return propval;
 }
 
index b447d2ee15a5264d931d9ebf9c06db064d0fa9ac..a374f91f0d73fed054087b79f6d2be39011d5174 100644 (file)
@@ -275,6 +275,8 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *),
       goacc_call_host_fn (fn, mapnum, hostaddrs, params);
       goto out_prof;
     }
+  else if (profiling_p)
+    api_info.device_api = acc_device_api_cuda;
 
   /* Default: let the runtime choose.  */
   for (i = 0; i != GOMP_DIM_MAX; i++)
diff --git a/libgomp/oacc-profiling-acc_register_library.c b/libgomp/oacc-profiling-acc_register_library.c
new file mode 100644 (file)
index 0000000..f6b482b
--- /dev/null
@@ -0,0 +1,39 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+   Contributed by Mentor Embedded.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file provides an stub acc_register_library function.  It's in a
+   separate file so that this function can easily be overridden when linking
+   statically.  */
+
+#include "libgomp.h"
+#include "acc_prof.h"
+
+void
+acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
+                     acc_prof_lookup_func lookup)
+{
+  gomp_debug (0, "dummy %s\n", __FUNCTION__);
+}
index eff28865089524eed052131ced495dba87367d9c..d54f4b8658f8c9ed8446d908bf69388fe9dc50cc 100644 (file)
@@ -104,7 +104,12 @@ goacc_profiling_initialize (void)
   for (int i = 0; i < acc_ev_last; ++i)
     goacc_prof_callbacks_enabled[i] = true;
 
-
+  /* We are to invoke an external acc_register_library routine, defaulting to
+     our stub oacc-profiling-acc_register_library.c:acc_register_library
+     implementation.  */
+  gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__);
+  //TODO.
+  acc_register_library (acc_prof_register, acc_prof_unregister, NULL);
 #ifdef PLUGIN_SUPPORT
   char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
   while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
@@ -141,10 +146,20 @@ goacc_profiling_initialize (void)
                = dlsym (dl_handle, "acc_register_library");
              if (a_r_l == NULL)
                goto dl_fail;
-             gomp_debug (0, "  %s: calling %s:acc_register_library\n",
-                         __FUNCTION__, acc_proflib);
-             a_r_l (acc_prof_register, acc_prof_unregister,
-                    acc_prof_lookup);
+             /* Avoid duplicate registration, for example if the same shared
+                library is specified in LD_PRELOAD and ACC_PROFLIB -- which
+                TAU 2.26 does when using "tau_exec -openacc".  */
+             if (a_r_l != acc_register_library)
+               {
+                 gomp_debug (0, "  %s: calling %s:acc_register_library\n",
+                             __FUNCTION__, acc_proflib);
+                 //TODO.
+                 a_r_l (acc_prof_register, acc_prof_unregister, NULL);
+               }
+             else
+               gomp_debug (0, "  %s: skipping duplicate"
+                           " %s:acc_register_library\n",
+                           __FUNCTION__, acc_proflib);
            }
          else
            {
@@ -487,13 +502,6 @@ acc_prof_lookup (const char *name)
   return NULL;
 }
 
-void
-acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
-                     acc_prof_lookup_func lookup)
-{
-  gomp_fatal ("TODO");
-}
-
 /* Prepare to dispatch events?  */
 
 bool
index d929bfd80a4fb380b9489e79e68c328a4349c859..a9a8c74150c9150e13d3bfe90d42b12700e94c44 100644 (file)
@@ -114,8 +114,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
   reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
index b356feb8108ca7add09963ee373adb4f690f6456..6a44e8ffb6a19a2b1b56de2ecc41a8a8d19061ae 100644 (file)
@@ -253,8 +253,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
   reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
index 7cfc364e4113fd11a255ed349eda389a8e9026ce..269b4398478d8d9560d15524a158eee3996a61bf 100644 (file)
@@ -41,6 +41,7 @@ static int state = -1;
 static acc_device_t acc_device_type;
 static int acc_device_num;
 static int num_gangs, num_workers, vector_length;
+static int async;
 
 
 static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
@@ -58,7 +59,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
   assert (prof_info->device_type == acc_device_type);
   assert (prof_info->device_number == acc_device_num);
   assert (prof_info->thread_id == -1);
-  assert (prof_info->async == acc_async_sync);
+  assert (prof_info->async == async);
   assert (prof_info->async_queue == prof_info->async);
   assert (prof_info->src_file == NULL);
   assert (prof_info->func_name == NULL);
@@ -144,8 +145,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
   assert (state == 0);
@@ -154,8 +153,10 @@ int main()
   acc_device_num = acc_get_device_num (acc_device_type);
   assert (state == 0);
 
-  /* Parallelism dimensions: compiler/runtime decides.  */
   STATE_OP (state, = 0);
+  /* Implicit async.  */
+  async = acc_async_noval;
+  /* Parallelism dimensions: compiler/runtime decides.  */
   num_gangs = num_workers = vector_length = 0;
   {
 #define N 100
@@ -175,8 +176,10 @@ int main()
 #undef N
   }
 
-  /* Parallelism dimensions: literal.  */
   STATE_OP (state, = 0);
+  /* Explicit async: without argument.  */
+  async = acc_async_noval;
+  /* Parallelism dimensions: literal.  */
   num_gangs = 30;
   num_workers = 3;
   vector_length = 5;
@@ -184,6 +187,7 @@ int main()
 #define N 100
     int x[N];
 #pragma acc kernels \
+  async \
   num_gangs (30) num_workers (3) vector_length (5)
     /* { dg-prune-output "using vector_length \\(32\\), ignoring 5" } */
     {
@@ -200,8 +204,10 @@ int main()
 #undef N
   }
 
-  /* Parallelism dimensions: variable.  */
   STATE_OP (state, = 0);
+  /* Explicit async: variable.  */
+  async = 123;
+  /* Parallelism dimensions: variable.  */
   num_gangs = 22;
   num_workers = 5;
   vector_length = 7;
@@ -209,6 +215,7 @@ int main()
 #define N 100
     int x[N];
 #pragma acc kernels \
+  async (async) \
   num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
     /* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
     {
index ac6eb48cbbef860e996fd94c277099effa52f2a3..116b9b538a6fa349d7680a0a0bcb4b221c663651 100644 (file)
@@ -667,8 +667,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
   STATE_OP (state, = 0);
   reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
   reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
index 5b58c51d4c42bbd3a6eb522b05ff47813977a55d..a723ad97b93301d275d24ed3bb96b41ded9e88e4 100644 (file)
@@ -143,8 +143,6 @@ typedef struct E
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
   A A1;
   DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A);
   assert (VALID_BYTES_A <= sizeof A1);
index f53786871671180b62c434362d604a5ae9c2492e..0f9e9562bc61ccdb877372aa9191186dcfb2a8f9 100644 (file)
@@ -56,8 +56,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
 
 int main()
 {
-  acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
   ev_count = 0;
 
   /* Trigger tests done in 'cb_*' functions.  */