/* Provide an entry point symbol to silence a linker warning. */
void _start() {}
+
+#define PR119369_fixed 0
+
+
+/* Host/device compatibility: '__cxa_finalize'. Dummy; if necessary,
+ overridden via libgomp 'target-cxa-dso-dtor.c'. */
+
+#if PR119369_fixed
+extern void __GCC_offload___cxa_finalize (void *) __attribute__((weak));
+#else
+void __GCC_offload___cxa_finalize (void *) __attribute__((weak));
+
+void __attribute__((weak))
+__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused)))
+{
+}
+#endif
+
+/* There are no DSOs; this is the main program. */
+static void * const __dso_handle = 0;
+
+
#ifdef USE_NEWLIB_INITFINI
extern void __libc_init_array (void) __attribute__((weak));
__attribute__((amdgpu_hsa_kernel ()))
void _fini_array()
{
+#if PR119369_fixed
+ if (__GCC_offload___cxa_finalize)
+#endif
+ __GCC_offload___cxa_finalize (__dso_handle);
+
__libc_fini_array ();
}
__attribute__((amdgpu_hsa_kernel ()))
void _fini_array()
{
+#if PR119369_fixed
+ if (__GCC_offload___cxa_finalize)
+#endif
+ __GCC_offload___cxa_finalize (__dso_handle);
+
size_t count;
size_t i;
extern int atexit (void (*function) (void));
+/* Host/device compatibility: '__cxa_finalize'. Dummy; if necessary,
+ overridden via libgomp 'target-cxa-dso-dtor.c'. */
+
+extern void __GCC_offload___cxa_finalize (void *);
+
+void __attribute__((weak))
+__GCC_offload___cxa_finalize (void *dso_handle __attribute__((unused)))
+{
+}
+
+/* There are no DSOs; this is the main program. */
+static void * const __dso_handle = 0;
+
+
/* Handler functions ('static', in contrast to the 'gbl-ctors.h'
prototypes). */
static void
__static_do_global_dtors (void)
{
+ __GCC_offload___cxa_finalize (__dso_handle);
+
func_ptr *p = __DTOR_LIST__;
++p;
for (; *p; ++p)
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c
+ oacc-target.c target-indirect.c target-cxa-dso-dtor.c
include $(top_srcdir)/plugin/Makefrag.am
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo target-indirect.lo $(am__objects_1)
+ oacc-target.lo target-indirect.lo target-cxa-dso-dtor.lo \
+ $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c $(am__append_3)
+ oacc-target.c target-indirect.c target-cxa-dso-dtor.c \
+ $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-cxa-dso-dtor.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target-indirect.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/task.Plo@am__quote@
--- /dev/null
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API
+
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "libgomp.h"
+
+extern void __cxa_finalize (void *);
+
+/* See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#dso-dtor>.
+
+ Even if the device is '!DEFAULT_USE_CXA_ATEXIT', we may see '__cxa_atexit'
+ calls, referencing '__dso_handle', via a 'DEFAULT_USE_CXA_ATEXIT' host.
+ '__cxa_atexit' is provided by newlib, but use of '__dso_handle' for nvptx
+ results in 'ld' error:
+
+ unresolved symbol __dso_handle
+ collect2: error: ld returned 1 exit status
+ nvptx mkoffload: fatal error: [...]/x86_64-pc-linux-gnu-accel-nvptx-none-gcc returned 1 exit status
+
+ ..., or for GCN get an implicit definition (running with
+ '--trace-symbol=__dso_handle'):
+
+ ./a.xamdgcn-amdhsa.mkoffload.hsaco-a.xamdgcn-amdhsa.mkoffload.2.o: reference to __dso_handle
+ <internal>: definition of __dso_handle
+
+ ..., which might be fine, but let's just make it explicit. */
+
+/* There are no DSOs; this is the main program. */
+attribute_hidden void * const __dso_handle = 0;
+
+/* If this file gets linked in, that means that '__dso_handle' has been
+ referenced (for '__cxa_atexit'), and in that case, we also have to run
+ '__cxa_finalize'. Make that happen by overriding the weak libgcc dummy
+ function '__GCC_offload___cxa_finalize'. */
+
+void
+__GCC_offload___cxa_finalize (void *dso_handle)
+{
+ __cxa_finalize (dso_handle);
+}
--- /dev/null
+/* Host/device compatibility: Itanium C++ ABI, DSO Object Destruction API */
+
+/* Nothing needed here. */
--- /dev/null
+/* Offloaded C++ objects construction and destruction. */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+ { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+ int x;
+
+ S()
+ : x(-1)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ S(int x)
+ : x(x)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ ~S()
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+};
+
+#pragma omp end declare target
+
+S sH1(7);
+
+#pragma omp declare target
+
+S sHD1(5);
+
+std::vector<S> svHD1(2);
+
+#pragma omp end declare target
+
+S sH2(3);
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+ For the host, there are four expected calls:
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+ For the device, there are two expected calls:
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* C++ objects are constructed in order of appearance (..., and destructed in reverse order).
+ { dg-output {S, 7, 1[\r\n]+} }
+ { dg-output {S, 5, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, 3, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 3, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, 5, 1[\r\n]+} }
+ { dg-output {~S, 7, 1[\r\n]+} }
+*/
--- /dev/null
+/* Offloaded 'constructor' and 'destructor' functions, and C++ objects construction and destruction. */
+
+/* { dg-require-effective-target init_priority } */
+
+/* { dg-additional-options -fdump-tree-optimized-raw-asmname }
+ { dg-additional-options -foffload-options=-fdump-tree-optimized-raw-asmname } */
+
+#include <omp.h>
+#include <vector>
+
+#pragma omp declare target
+
+struct S
+{
+ int x;
+
+ S()
+ : x(-1)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ S(int x)
+ : x(x)
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+ ~S()
+ {
+ __builtin_printf("%s, %d, %d\n", __FUNCTION__, x, omp_is_initial_device());
+ }
+};
+
+#pragma omp end declare target
+
+S sH1 __attribute__((init_priority(1500))) (7);
+
+#pragma omp declare target
+
+S sHD1 __attribute__((init_priority(2000))) (5);
+
+std::vector<S> svHD1 __attribute__((init_priority(1000))) (2);
+
+static void
+__attribute__((constructor(20000)))
+initDH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(20000)))
+finiDH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+#pragma omp end declare target
+
+S sH2 __attribute__((init_priority(500))) (3);
+
+static void
+__attribute__((constructor(10000)))
+initH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+static void
+__attribute__((destructor(10000)))
+finiH1()
+{
+ __builtin_printf("%s, %d\n", __FUNCTION__, omp_is_initial_device());
+}
+
+int main()
+{
+ int c = 0;
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+#pragma omp target map(c)
+ {
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+ }
+
+ __builtin_printf("%s:%d, %d\n", __FUNCTION__, ++c, omp_is_initial_device());
+
+ return 0;
+}
+
+/* Verify '__cxa_atexit' calls.
+
+ For the host, there are four expected calls:
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, } 4 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sH2, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+
+ For the device, there are two expected calls:
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, } 2 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZN1SD1Ev, \&sHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+ { dg-final { scan-offload-tree-dump-times {gimple_call <__cxa_atexit, NULL, _ZNSt6vectorI1SSaIS0_EED1Ev, \&svHD1, \&__dso_handle>} 1 optimized { target cxa_atexit } } }
+*/
+
+/* Defined order in which 'constructor' functions, and 'destructor' functions are run, and C++ objects are constructed (..., and destructed in reverse order).
+ { dg-output {S, 3, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, -1, 1[\r\n]+} }
+ { dg-output {S, 7, 1[\r\n]+} }
+ { dg-output {S, 5, 1[\r\n]+} }
+ { dg-output {initH1, 1[\r\n]+} }
+ { dg-output {initDH1, 1[\r\n]+} }
+ { dg-output {main:1, 1[\r\n]+} }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {initDH1, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:2, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:2, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:3, 1[\r\n]+} { target { ! offload_device } } }
+ { dg-output {main:3, 0[\r\n]+} { target offload_device } }
+ { dg-output {main:4, 1[\r\n]+} }
+ { dg-output {~S, 5, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, -1, 0[\r\n]+} { target offload_device } }
+ { dg-output {finiDH1, 0[\r\n]+} { target offload_device } }
+ { dg-output {~S, 5, 1[\r\n]+} }
+ { dg-output {~S, 7, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, -1, 1[\r\n]+} }
+ { dg-output {~S, 3, 1[\r\n]+} }
+ { dg-output {finiDH1, 1[\r\n]+} }
+ { dg-output {finiH1, 1[\r\n]+} }
+*/