From 833ff20bf8dbc6adeb57d1b5490f0426f2463a3e Mon Sep 17 00:00:00 2001 From: Kwok Cheung Yeung Date: Fri, 21 Jun 2019 10:40:38 -0700 Subject: [PATCH] Add changes to profiling interface from OG8 branch This bundles up the parts of the profiling code from the OG8 branch that were not included in the upstream patch. libgomp/ * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update. libgomp/ * oacc-init.c (get_property_any): Add profiling code. libgomp/ * Makefile.am (libgomp_la_SOURCES): Add oacc-profiling-acc_register_library.c. * Makefile.in: Regenerate. * libgomp.texi: Remove paragraph about acc_register_library. * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for profiling. * oacc-profiling-acc_register_library.c: New file. * oacc-profiling.c (goacc_profiling_initialize): Call acc_register_library. Avoid duplicate registration. (acc_register_library): Remove. * config/nvptx/oacc-profiling-acc_register_library.c: New empty file. * config/nvptx/oacc-profiling.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove call to acc_register_library. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. --- libgomp/ChangeLog.omp | 32 +++++++++++++++ libgomp/Makefile.am | 2 +- libgomp/Makefile.in | 10 +++-- .../oacc-profiling-acc_register_library.c | 0 libgomp/config/nvptx/oacc-profiling.c | 0 libgomp/libgomp.texi | 8 ---- libgomp/oacc-init.c | 21 +++++++++- libgomp/oacc-parallel.c | 2 + libgomp/oacc-profiling-acc_register_library.c | 39 +++++++++++++++++++ libgomp/oacc-profiling.c | 32 +++++++++------ .../acc_prof-dispatch-1.c | 2 - .../acc_prof-init-1.c | 2 - .../acc_prof-kernels-1.c | 19 ++++++--- .../acc_prof-parallel-1.c | 2 - .../acc_prof-valid_bytes-1.c | 2 - .../acc_prof-version-1.c | 2 - 16 files changed, 134 insertions(+), 41 deletions(-) create mode 100644 libgomp/config/nvptx/oacc-profiling-acc_register_library.c create mode 100644 libgomp/config/nvptx/oacc-profiling.c create mode 100644 libgomp/oacc-profiling-acc_register_library.c diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 4575fbe6c53d..3ee537015606 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,35 @@ +2019-01-23 Thomas Schwinge + + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Update. + +2018-12-20 Maciej W. Rozycki + + * oacc-init.c (get_property_any): Add profiling code. + +2017-02-28 Thomas Schwinge + + * Makefile.am (libgomp_la_SOURCES): Add + oacc-profiling-acc_register_library.c. + * Makefile.in: Regenerate. + * libgomp.texi: Remove paragraph about acc_register_library. + * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for + profiling. + * oacc-profiling-acc_register_library.c: New file. + * oacc-profiling.c (goacc_profiling_initialize): Call + acc_register_library. Avoid duplicate registration. + (acc_register_library): Remove. + * config/nvptx/oacc-profiling-acc_register_library.c: + New empty file. + * config/nvptx/oacc-profiling.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove + call to acc_register_library. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: + Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. + 2019-05-16 Julian Brown * testsuite/libgomp.oacc-c-c++-common/kernels-for-index-reuse-1.c: New diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index f8b2a06d63e4..fa3104f7321e 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -66,7 +66,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c + oacc-target.c oacc-profiling-acc_register_library.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 22cb2136a08d..5026d6a546a6 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -16,7 +16,7 @@ # Plugins for offload execution, Makefile.am fragment. # -# Copyright (C) 2014-2021 Free Software Foundation, Inc. +# Copyright (C) 2014-2022 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -220,7 +220,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \ oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ - oacc-target.lo $(am__objects_1) + oacc-target.lo oacc-profiling-acc_register_library.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -506,6 +507,7 @@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ +runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ @@ -559,7 +561,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ - oacc-target.c $(am__append_3) + oacc-target.c oacc-profiling-acc_register_library.c \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -762,6 +765,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@ diff --git a/libgomp/config/nvptx/oacc-profiling-acc_register_library.c b/libgomp/config/nvptx/oacc-profiling-acc_register_library.c new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/libgomp/config/nvptx/oacc-profiling.c b/libgomp/config/nvptx/oacc-profiling.c new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 8e4ecd9f0558..cfd8ca59cd28 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -3932,14 +3932,6 @@ We just handle one case specially, as required by CUDA 9.0 @code{acc_ev_device_init_start}, @code{acc_ev_device_init_end} callbacks. -We're not yet implementing initialization via a -@code{acc_register_library} function that is either statically linked -in, or dynamically via @env{LD_PRELOAD}. -Initialization via @code{acc_register_library} functions dynamically -loaded via the @env{ACC_PROFLIB} environment variable does work, as -does directly calling @code{acc_prof_register}, -@code{acc_prof_unregister}, @code{acc_prof_lookup}. - As currently there are no inquiry functions defined, calls to @code{acc_prof_lookup} will always return @code{NULL}. diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 1565aa0f290b..18d682ebd410 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -810,6 +810,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop) if (d == acc_device_current && thr && thr->dev) return thr->dev->openacc.get_property_func (thr->dev->target_id, prop); + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + + if (profiling_p) + { + prof_info.device_type = d; + prof_info.device_number = ord; + } + gomp_mutex_lock (&acc_device_lock); struct gomp_device_descr *dev = resolve_device (d, true); @@ -830,7 +840,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop) assert (dev); - return dev->openacc.get_property_func (dev->target_id, prop); + union goacc_property_value propval = + dev->openacc.get_property_func (dev->target_id, prop); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } + + return propval; } size_t diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 0cc087c76564..d66bc882a5f0 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -367,6 +367,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), fn (hostaddrs); goto out_prof; } + else if (profiling_p) + api_info.device_api = acc_device_api_cuda; /* Default: let the runtime choose. */ for (i = 0; i != GOMP_DIM_MAX; i++) diff --git a/libgomp/oacc-profiling-acc_register_library.c b/libgomp/oacc-profiling-acc_register_library.c new file mode 100644 index 000000000000..f6b482b51f4b --- /dev/null +++ b/libgomp/oacc-profiling-acc_register_library.c @@ -0,0 +1,39 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* This file provides an stub acc_register_library function. It's in a + separate file so that this function can easily be overridden when linking + statically. */ + +#include "libgomp.h" +#include "acc_prof.h" + +void +acc_register_library (acc_prof_reg reg, acc_prof_reg unreg, + acc_prof_lookup_func lookup) +{ + gomp_debug (0, "dummy %s\n", __FUNCTION__); +} diff --git a/libgomp/oacc-profiling.c b/libgomp/oacc-profiling.c index 3df6eeba1c01..80c829798dec 100644 --- a/libgomp/oacc-profiling.c +++ b/libgomp/oacc-profiling.c @@ -104,7 +104,12 @@ goacc_profiling_initialize (void) for (int i = 0; i < acc_ev_last; ++i) goacc_prof_callbacks_enabled[i] = true; - + /* We are to invoke an external acc_register_library routine, defaulting to + our stub oacc-profiling-acc_register_library.c:acc_register_library + implementation. */ + gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__); + //TODO. + acc_register_library (acc_prof_register, acc_prof_unregister, NULL); #ifdef PLUGIN_SUPPORT char *acc_proflibs = secure_getenv ("ACC_PROFLIB"); while (acc_proflibs != NULL && acc_proflibs[0] != '\0') @@ -141,10 +146,20 @@ goacc_profiling_initialize (void) = dlsym (dl_handle, "acc_register_library"); if (a_r_l == NULL) goto dl_fail; - gomp_debug (0, " %s: calling %s:acc_register_library\n", - __FUNCTION__, acc_proflib); - a_r_l (acc_prof_register, acc_prof_unregister, - acc_prof_lookup); + /* Avoid duplicate registration, for example if the same shared + library is specified in LD_PRELOAD and ACC_PROFLIB -- which + TAU 2.26 does when using "tau_exec -openacc". */ + if (a_r_l != acc_register_library) + { + gomp_debug (0, " %s: calling %s:acc_register_library\n", + __FUNCTION__, acc_proflib); + //TODO. + a_r_l (acc_prof_register, acc_prof_unregister, NULL); + } + else + gomp_debug (0, " %s: skipping duplicate" + " %s:acc_register_library\n", + __FUNCTION__, acc_proflib); } else { @@ -487,13 +502,6 @@ acc_prof_lookup (const char *name) return NULL; } -void -acc_register_library (acc_prof_reg reg, acc_prof_reg unreg, - acc_prof_lookup_func lookup) -{ - gomp_fatal ("TODO"); -} - /* Prepare to dispatch events? */ bool diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c index d929bfd80a4f..a9a8c74150c9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c @@ -114,8 +114,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg); reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index b5e771554601..91b373216c93 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -270,8 +270,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_device_init_start, cb_device_init_start, acc_reg); reg (acc_ev_device_init_end, cb_device_init_end, acc_reg); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c index 2c8539714740..2cd2c98ddf1c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c @@ -59,6 +59,7 @@ static int state = -1; static acc_device_t acc_device_type; static int acc_device_num; static int num_gangs, num_workers, vector_length; +static int async; static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) @@ -76,7 +77,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - assert (prof_info->async == acc_async_noval); + assert (prof_info->async == async); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); @@ -166,8 +167,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg); assert (state == 0); @@ -176,8 +175,10 @@ int main() acc_device_num = acc_get_device_num (acc_device_type); assert (state == 0); - /* Parallelism dimensions: compiler/runtime decides. */ STATE_OP (state, = 0); + /* Implicit async. */ + async = acc_async_noval; + /* Parallelism dimensions: compiler/runtime decides. */ num_gangs = num_workers = vector_length = 0; { #define N 100 @@ -203,8 +204,10 @@ int main() #undef N } - /* Parallelism dimensions: literal. */ STATE_OP (state, = 0); + /* Explicit async: without argument. */ + async = acc_async_noval; + /* Parallelism dimensions: literal. */ num_gangs = 30; num_workers = 3; vector_length = 5; @@ -212,6 +215,7 @@ int main() #define N 100 int x[N]; #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \ + async \ num_gangs (30) num_workers (3) vector_length (5) /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute } { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */ @@ -234,8 +238,10 @@ int main() #undef N } - /* Parallelism dimensions: variable. */ STATE_OP (state, = 0); + /* Explicit async: variable. */ + async = 123; + /* Parallelism dimensions: variable. */ num_gangs = 22; num_workers = 5; vector_length = 7; @@ -243,6 +249,7 @@ int main() #define N 100 int x[N]; #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \ + async (async) \ num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length) /* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute } { dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 1f503861cb66..28a47ccc27df 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -698,8 +698,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - STATE_OP (state, = 0); reg (acc_ev_device_init_start, cb_device_init_start, acc_reg); reg (acc_ev_device_init_end, cb_device_init_end, acc_reg); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c index 5b58c51d4c42..a723ad97b933 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c @@ -143,8 +143,6 @@ typedef struct E int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - A A1; DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A); assert (VALID_BYTES_A <= sizeof A1); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c index f53786871671..0f9e9562bc61 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c @@ -56,8 +56,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look int main() { - acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup); - ev_count = 0; /* Trigger tests done in 'cb_*' functions. */ -- 2.47.2