From: Kwok Cheung Yeung Date: Fri, 21 Jun 2019 17:40:38 +0000 (-0700) Subject: Add changes to profiling interface from OG8 branch X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8a74f62e60ae8606878e1a70d4aff749d99b7723;p=thirdparty%2Fgcc.git Add changes to profiling interface from OG8 branch This bundles up the parts of the profiling code from the OG8 branch that were not included in the upstream patch. libgomp/ * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update. libgomp/ * oacc-init.c (get_property_any): Add profiling code. libgomp/ * Makefile.am (libgomp_la_SOURCES): Add oacc-profiling-acc_register_library.c. * Makefile.in: Regenerate. * libgomp.texi: Remove paragraph about acc_register_library. * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for profiling. * oacc-profiling-acc_register_library.c: New file. * oacc-profiling.c (goacc_profiling_initialize): Call acc_register_library. Avoid duplicate registration. (acc_register_library): Remove. * config/nvptx/oacc-profiling-acc_register_library.c: New empty file. * config/nvptx/oacc-profiling.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove call to acc_register_library. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. (cherry picked from openacc-gcc-9-branch commit b1321d52402d217793a27592e2959b07319df75b) --- diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 66f2a80850b4..caf5f6e2fb1b 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,35 @@ +2019-01-23 Thomas Schwinge + + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update. + +2018-12-20 Maciej W. Rozycki + + * oacc-init.c (get_property_any): Add profiling code. + +2017-02-28 Thomas Schwinge + + * Makefile.am (libgomp_la_SOURCES): Add + oacc-profiling-acc_register_library.c. + * Makefile.in: Regenerate. + * libgomp.texi: Remove paragraph about acc_register_library. + * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for + profiling. + * oacc-profiling-acc_register_library.c: New file. + * oacc-profiling.c (goacc_profiling_initialize): Call + acc_register_library. Avoid duplicate registration. + (acc_register_library): Remove. + * config/nvptx/oacc-profiling-acc_register_library.c: + New empty file. + * config/nvptx/oacc-profiling.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove + call to acc_register_library. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. + 2019-05-17 Thomas Schwinge * acc_prof.h: New file. diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index 50918707dd78..00848cdc792e 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -72,7 +72,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ proc.c sem.c bar.c ptrlock.c time.c fortran.c affinity.c target.c \ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c oacc-init.c \ oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - affinity-fmt.c teams.c oacc-profiling.c + affinity-fmt.c teams.c oacc-profiling.c \ + oacc-profiling-acc_register_library.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 23efc844bde1..63f919cb7ca3 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -16,7 +16,7 @@ # Plugins for offload execution, Makefile.am fragment. # -# Copyright (C) 2014-2018 Free Software Foundation, Inc. +# Copyright (C) 2014-2019 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -216,7 +216,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ target.lo splay-tree.lo libgomp-plugin.lo oacc-parallel.lo \ oacc-host.lo oacc-init.lo oacc-mem.lo oacc-async.lo \ oacc-plugin.lo oacc-cuda.lo priority_queue.lo affinity-fmt.lo \ - teams.lo oacc-profiling.lo $(am__objects_1) + teams.lo oacc-profiling.lo \ + oacc-profiling-acc_register_library.lo $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -524,7 +525,7 @@ search_path = $(addprefix $(top_srcdir)/config/, $(config_path)) $(top_srcdir) \ fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include -libgomp_la_LIBADD = $(LIBFFI) +@USE_LIBFFI_TRUE@libgomp_la_LIBADD = $(LIBFFI) AM_CPPFLAGS = $(addprefix -I, $(search_path)) $(LIBFFIINCS) AM_CFLAGS = $(XCFLAGS) AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS) @@ -553,7 +554,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ affinity.c target.c splay-tree.c libgomp-plugin.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ - affinity-fmt.c teams.c oacc-profiling.c $(am__append_3) + affinity-fmt.c teams.c oacc-profiling.c \ + oacc-profiling-acc_register_library.c $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -755,6 +757,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@ diff --git a/libgomp/config/nvptx/oacc-profiling-acc_register_library.c b/libgomp/config/nvptx/oacc-profiling-acc_register_library.c new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/libgomp/config/nvptx/oacc-profiling.c b/libgomp/config/nvptx/oacc-profiling.c new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 2e052725aaea..5c9cf9e001d7 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -3254,14 +3254,6 @@ every event that has been registered. We're not yet accounting for the fact that @cite{OpenACC events may occur during event processing}. -We're not yet implementing initialization via a -@code{acc_register_library} function that is either statically linked -in, or dynamically via @env{LD_PRELOAD}. -Initialization via @code{acc_register_library} functions dynamically -loaded via the @env{ACC_PROFLIB} environment variable does work, as -does directly calling @code{acc_prof_register}, -@code{acc_prof_unregister}, @code{acc_prof_lookup}. - As currently there are no inquiry functions defined, calls to @code{acc_prof_lookup} will always return @code{NULL}. diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index f7ea58fe769d..766f59f39635 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -791,14 +791,30 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop) if (d == acc_device_current && (!thr || !thr->dev)) return (union gomp_device_property_value) { .val = 0 }; + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + if (d == acc_device_current) { + if (profiling_p) + { + prof_info.device_type = acc_device_type (thr->dev->type); + prof_info.device_number = thr->dev->target_id; + } + dev = thr->dev; } else { int num_devices; + if (profiling_p) + { + prof_info.device_type = d; + prof_info.device_number = ord; + } + gomp_mutex_lock (&acc_device_lock); dev = resolve_device (d, false); @@ -822,6 +838,12 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop) propval = dev->get_property_func (dev->target_id, prop); + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } + return propval; } diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index b447d2ee15a5..a374f91f0d73 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -275,6 +275,8 @@ GOACC_parallel_keyed_internal (int flags_m, int params, void (*fn) (void *), goacc_call_host_fn (fn, mapnum, hostaddrs, params); goto out_prof; } + else if (profiling_p) + api_info.device_api = acc_device_api_cuda; /* Default: let the runtime choose. */ for (i = 0; i != GOMP_DIM_MAX; i++) diff --git a/libgomp/oacc-profiling-acc_register_library.c b/libgomp/oacc-profiling-acc_register_library.c new file mode 100644 index 000000000000..f6b482b51f4b --- /dev/null +++ b/libgomp/oacc-profiling-acc_register_library.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file provides an stub acc_register_library function. It's in a + separate file so that this function can easily be overridden when linking + statically. */ + +#include "libgomp.h" +#include "acc_prof.h" + +void +acc_register_library (acc_prof_reg reg, acc_prof_reg unreg, + acc_prof_lookup_func lookup) +{ + gomp_debug (0, "dummy %s\n", __FUNCTION__); +} diff --git a/libgomp/oacc-profiling.c b/libgomp/oacc-profiling.c index eff288650895..d54f4b8658f8 100644 --- a/libgomp/oacc-profiling.c +++ b/libgomp/oacc-profiling.c @@ -104,7 +104,12 @@ goacc_profiling_initialize (void) for (int i = 0; i < acc_ev_last; ++i) goacc_prof_callbacks_enabled[i] = true; - + /* We are to invoke an external acc_register_library routine, defaulting to + our stub oacc-profiling-acc_register_library.c:acc_register_library + implementation. */ + gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__); + //TODO. + acc_register_library (acc_prof_register, acc_prof_unregister, NULL); #ifdef PLUGIN_SUPPORT char *acc_proflibs = secure_getenv ("ACC_PROFLIB"); while (acc_proflibs != NULL && acc_proflibs[0] != '\0') @@ -141,10 +146,20 @@ goacc_profiling_initialize (void) = dlsym (dl_handle, "acc_register_library"); if (a_r_l == NULL) goto dl_fail; - gomp_debug (0, " %s: calling %s:acc_register_library\n", - __FUNCTION__, acc_proflib); - a_r_l (acc_prof_register, acc_prof_unregister, - acc_prof_lookup); + /* Avoid duplicate registration, for example if the same shared + library is specified in LD_PRELOAD and ACC_PROFLIB -- which + TAU 2.26 does when using "tau_exec -openacc". */ + if (a_r_l != acc_register_library) + { + gomp_debug (0, " %s: calling %s:acc_register_library\n", + __FUNCTION__, acc_proflib); + //TODO. + a_r_l (acc_prof_register, acc_prof_unregister, NULL); + } + else + gomp_debug (0, " %s: skipping duplicate" + " %s:acc_register_library\n", + __FUNCTION__, acc_proflib); } else { @@ -487,13 +502,6 @@ acc_prof_lookup (const char *name) return NULL; } -void -acc_register_library (acc_prof_reg reg, acc_prof_reg unreg, - acc_prof_lookup_func lookup) -{ - gomp_fatal ("TODO"); -} - /* Prepare to dispatch events? */ bool diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c index d929bfd80a4f..a9a8c74150c9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c @@ -114,8 +114,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg); reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index b356feb8108c..6a44e8ffb6a1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -253,8 +253,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_device_init_start, cb_device_init_start, acc_reg); reg (acc_ev_device_init_end, cb_device_init_end, acc_reg); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c index 7cfc364e4113..269b4398478d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c @@ -41,6 +41,7 @@ static int state = -1; static acc_device_t acc_device_type; static int acc_device_num; static int num_gangs, num_workers, vector_length; +static int async; static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) @@ -58,7 +59,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - assert (prof_info->async == acc_async_sync); + assert (prof_info->async == async); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); @@ -144,8 +145,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg); assert (state == 0); @@ -154,8 +153,10 @@ int main() acc_device_num = acc_get_device_num (acc_device_type); assert (state == 0); - /* Parallelism dimensions: compiler/runtime decides. */ STATE_OP (state, = 0); + /* Implicit async. */ + async = acc_async_noval; + /* Parallelism dimensions: compiler/runtime decides. */ num_gangs = num_workers = vector_length = 0; { #define N 100 @@ -175,8 +176,10 @@ int main() #undef N } - /* Parallelism dimensions: literal. */ STATE_OP (state, = 0); + /* Explicit async: without argument. */ + async = acc_async_noval; + /* Parallelism dimensions: literal. */ num_gangs = 30; num_workers = 3; vector_length = 5; @@ -184,6 +187,7 @@ int main() #define N 100 int x[N]; #pragma acc kernels \ + async \ num_gangs (30) num_workers (3) vector_length (5) /* { dg-prune-output "using vector_length \\(32\\), ignoring 5" } */ { @@ -200,8 +204,10 @@ int main() #undef N } - /* Parallelism dimensions: variable. */ STATE_OP (state, = 0); + /* Explicit async: variable. */ + async = 123; + /* Parallelism dimensions: variable. */ num_gangs = 22; num_workers = 5; vector_length = 7; @@ -209,6 +215,7 @@ int main() #define N 100 int x[N]; #pragma acc kernels \ + async (async) \ num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length) /* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */ { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index ac6eb48cbbef..116b9b538a6f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -667,8 +667,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_device_init_start, cb_device_init_start, acc_reg); reg (acc_ev_device_init_end, cb_device_init_end, acc_reg); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c index 5b58c51d4c42..a723ad97b933 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c @@ -143,8 +143,6 @@ typedef struct E int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - A A1; DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A); assert (VALID_BYTES_A <= sizeof A1); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c index f53786871671..0f9e9562bc61 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c @@ -56,8 +56,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - ev_count = 0; /* Trigger tests done in 'cb_*' functions. */