This bundles up the parts of the profiling code from the OG8 branch that were
not included in the upstream patch.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update.
libgomp/
* oacc-init.c (get_property_any): Add profiling code.
libgomp/
* Makefile.am (libgomp_la_SOURCES): Add
oacc-profiling-acc_register_library.c.
* Makefile.in: Regenerate.
* libgomp.texi: Remove paragraph about acc_register_library.
* oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for
profiling.
* oacc-profiling-acc_register_library.c: New file.
* oacc-profiling.c (goacc_profiling_initialize): Call
acc_register_library. Avoid duplicate registration.
(acc_register_library): Remove.
* config/nvptx/oacc-profiling-acc_register_library.c:
New empty file.
* config/nvptx/oacc-profiling.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove
call to acc_register_library.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise.
(cherry picked from openacc-gcc-9-branch commit
b1321d52402d217793a27592e2959b07319df75b)
+2019-01-23 Thomas Schwinge <thomas@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update.
+
+2018-12-20 Maciej W. Rozycki <macro@codesourcery.com>
+
+ * oacc-init.c (get_property_any): Add profiling code.
+
+2017-02-28 Thomas Schwinge <thomas@codesourcery.com>
+
+ * Makefile.am (libgomp_la_SOURCES): Add
+ oacc-profiling-acc_register_library.c.
+ * Makefile.in: Regenerate.
+ * libgomp.texi: Remove paragraph about acc_register_library.
+ * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for
+ profiling.
+ * oacc-profiling-acc_register_library.c: New file.
+ * oacc-profiling.c (goacc_profiling_initialize): Call
+ acc_register_library. Avoid duplicate registration.
+ (acc_register_library): Remove.
+ * config/nvptx/oacc-profiling-acc_register_library.c:
+ New empty file.
+ * config/nvptx/oacc-profiling.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove
+ call to acc_register_library.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c:
+ Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise.
+
2019-05-17 Thomas Schwinge <thomas@codesourcery.com>
* acc_prof.h: New file.
proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \
splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \
oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
- affinity-fmt.c teams.c oacc-profiling.c
+ affinity-fmt.c teams.c oacc-profiling.c \
+ oacc-profiling-acc_register_library.c
include $(top_srcdir)/plugin/Makefrag.am
# Plugins for offload execution, Makefile.am fragment.
#
-# Copyright (C) 2014-2018 Free Software Foundation, Inc.
+# Copyright (C) 2014-2019 Free Software Foundation, Inc.
#
# Contributed by Mentor Embedded.
#
target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \
oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \
oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \
- teams.lo oacc-profiling.lo $(am__objects_1)
+ teams.lo oacc-profiling.lo \
+ oacc-profiling-acc_register_library.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude
libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
-libgomp_la_LIBADD = $(LIBFFI)
+@USE_LIBFFI_TRUE@libgomp_la_LIBADD = $(LIBFFI)
AM_CPPFLAGS = $(addprefix -I, $(search_path)) $(LIBFFIINCS)
AM_CFLAGS = $(XCFLAGS)
AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS)
affinity.c target.c splay-tree.c libgomp-plugin.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
- affinity-fmt.c teams.c oacc-profiling.c $(am__append_3)
+ affinity-fmt.c teams.c oacc-profiling.c \
+ oacc-profiling-acc_register_library.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
We're not yet accounting for the fact that @cite{OpenACC events may
occur during event processing}.
-We're not yet implementing initialization via a
-@code{acc_register_library} function that is either statically linked
-in, or dynamically via @env{LD_PRELOAD}.
-Initialization via @code{acc_register_library} functions dynamically
-loaded via the @env{ACC_PROFLIB} environment variable does work, as
-does directly calling @code{acc_prof_register},
-@code{acc_prof_unregister}, @code{acc_prof_lookup}.
-
As currently there are no inquiry functions defined, calls to
@code{acc_prof_lookup} will always return @code{NULL}.
if (d == acc_device_current && (!thr || !thr->dev))
return (union gomp_device_property_value) { .val = 0 };
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
if (d == acc_device_current)
{
+ if (profiling_p)
+ {
+ prof_info.device_type = acc_device_type (thr->dev->type);
+ prof_info.device_number = thr->dev->target_id;
+ }
+
dev = thr->dev;
}
else
{
int num_devices;
+ if (profiling_p)
+ {
+ prof_info.device_type = d;
+ prof_info.device_number = ord;
+ }
+
gomp_mutex_lock (&acc_device_lock);
dev = resolve_device (d, false);
propval = dev->get_property_func (dev->target_id, prop);
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+
return propval;
}
goacc_call_host_fn (fn, mapnum, hostaddrs, params);
goto out_prof;
}
+ else if (profiling_p)
+ api_info.device_api = acc_device_api_cuda;
/* Default: let the runtime choose. */
for (i = 0; i != GOMP_DIM_MAX; i++)
--- /dev/null
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file provides an stub acc_register_library function. It's in a
+ separate file so that this function can easily be overridden when linking
+ statically. */
+
+#include "libgomp.h"
+#include "acc_prof.h"
+
+void
+acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
+ acc_prof_lookup_func lookup)
+{
+ gomp_debug (0, "dummy %s\n", __FUNCTION__);
+}
for (int i = 0; i < acc_ev_last; ++i)
goacc_prof_callbacks_enabled[i] = true;
-
+ /* We are to invoke an external acc_register_library routine, defaulting to
+ our stub oacc-profiling-acc_register_library.c:acc_register_library
+ implementation. */
+ gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__);
+ //TODO.
+ acc_register_library (acc_prof_register, acc_prof_unregister, NULL);
#ifdef PLUGIN_SUPPORT
char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
= dlsym (dl_handle, "acc_register_library");
if (a_r_l == NULL)
goto dl_fail;
- gomp_debug (0, " %s: calling %s:acc_register_library\n",
- __FUNCTION__, acc_proflib);
- a_r_l (acc_prof_register, acc_prof_unregister,
- acc_prof_lookup);
+ /* Avoid duplicate registration, for example if the same shared
+ library is specified in LD_PRELOAD and ACC_PROFLIB -- which
+ TAU 2.26 does when using "tau_exec -openacc". */
+ if (a_r_l != acc_register_library)
+ {
+ gomp_debug (0, " %s: calling %s:acc_register_library\n",
+ __FUNCTION__, acc_proflib);
+ //TODO.
+ a_r_l (acc_prof_register, acc_prof_unregister, NULL);
+ }
+ else
+ gomp_debug (0, " %s: skipping duplicate"
+ " %s:acc_register_library\n",
+ __FUNCTION__, acc_proflib);
}
else
{
return NULL;
}
-void
-acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
- acc_prof_lookup_func lookup)
-{
- gomp_fatal ("TODO");
-}
-
/* Prepare to dispatch events? */
bool
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
static acc_device_t acc_device_type;
static int acc_device_num;
static int num_gangs, num_workers, vector_length;
+static int async;
static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
- assert (prof_info->async == acc_async_sync);
+ assert (prof_info->async == async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
assert (state == 0);
acc_device_num = acc_get_device_num (acc_device_type);
assert (state == 0);
- /* Parallelism dimensions: compiler/runtime decides. */
STATE_OP (state, = 0);
+ /* Implicit async. */
+ async = acc_async_noval;
+ /* Parallelism dimensions: compiler/runtime decides. */
num_gangs = num_workers = vector_length = 0;
{
#define N 100
#undef N
}
- /* Parallelism dimensions: literal. */
STATE_OP (state, = 0);
+ /* Explicit async: without argument. */
+ async = acc_async_noval;
+ /* Parallelism dimensions: literal. */
num_gangs = 30;
num_workers = 3;
vector_length = 5;
#define N 100
int x[N];
#pragma acc kernels \
+ async \
num_gangs (30) num_workers (3) vector_length (5)
/* { dg-prune-output "using vector_length \\(32\\), ignoring 5" } */
{
#undef N
}
- /* Parallelism dimensions: variable. */
STATE_OP (state, = 0);
+ /* Explicit async: variable. */
+ async = 123;
+ /* Parallelism dimensions: variable. */
num_gangs = 22;
num_workers = 5;
vector_length = 7;
#define N 100
int x[N];
#pragma acc kernels \
+ async (async) \
num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
/* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */
{
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
A A1;
DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A);
assert (VALID_BYTES_A <= sizeof A1);
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
ev_count = 0;
/* Trigger tests done in 'cb_*' functions. */