<http://www.gnu.org/licenses/>. */
#include <assert.h>
-#include <string.h>
#include "libgomp.h"
-struct indirect_map_t
-{
- void *host_addr;
- void *target_addr;
-};
+void *GOMP_INDIRECT_ADDR_MAP = NULL;
+
+#define USE_HASHTAB_LOOKUP
+
+#ifdef USE_HASHTAB_LOOKUP
+
+#include <string.h> /* For memset. */
+
+/* Use a hashtab to lookup the target address instead of using a linear
+ search.
+
+ With newer libgomp on the host the hash is already initialized on the host
+ (i.e plugin/plugin-gcn.c). Thus, build_indirect_map is only used as
+ fallback with older glibc. */
-typedef struct indirect_map_t *hash_entry_type;
+void *GOMP_INDIRECT_ADDR_HMAP = NULL;
+
+typedef unsigned __int128 hash_entry_type;
+#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
+#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
+#define SET_INDIRECT_HOST_ADDR(p, host) p = (((unsigned __int128) (uintptr_t) host))
+#define SET_INDIRECT_ADDRS(p, h, d) \
+ p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
+
+/* Besides the sizes, also the endianness either needs to agree or
+ host-device memcpy needs to take care of this. */
+_Static_assert (sizeof (unsigned __int128) == 2*sizeof(void*),
+ "indirect_target_map_t size mismatch");
static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
-static inline void htab_free (void *ptr) { free (ptr); }
+static inline void htab_free (void *ptr) { __builtin_unreachable (); }
#include "hashtab.h"
static inline hashval_t
htab_hash (hash_entry_type element)
{
- return hash_pointer (element->host_addr);
+ return hash_pointer (INDIRECT_HOST_ADDR (element));
}
static inline bool
htab_eq (hash_entry_type x, hash_entry_type y)
{
- return x->host_addr == y->host_addr;
+ return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (y);
}
-void **GOMP_INDIRECT_ADDR_MAP = NULL;
-
-/* Use a hashtab to lookup the target address instead of using a linear
- search. */
-#define USE_HASHTAB_LOOKUP
+void *
+GOMP_target_map_indirect_ptr (void *ptr)
+{
+ /* NULL pointers always resolve to NULL. */
+ if (!ptr)
+ return ptr;
-#ifdef USE_HASHTAB_LOOKUP
+ assert (GOMP_INDIRECT_ADDR_HMAP);
-static htab_t indirect_htab = NULL;
+ hash_entry_type element;
+ SET_INDIRECT_HOST_ADDR (element, ptr);
+ hash_entry_type entry = htab_find ((htab_t) GOMP_INDIRECT_ADDR_HMAP, element);
+ return entry ? INDIRECT_DEV_ADDR (entry) : ptr;
+}
/* Build the hashtab used for host->target address lookups. */
build_indirect_map (void)
{
size_t num_ind_funcs = 0;
- void **map_entry;
+ uint64_t *map_entry;
- if (!GOMP_INDIRECT_ADDR_MAP)
+ if (!GOMP_INDIRECT_ADDR_MAP || GOMP_INDIRECT_ADDR_HMAP)
return;
- if (!indirect_htab)
- {
- /* Count the number of entries in the NULL-terminated address map. */
- for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
- map_entry += 2, num_ind_funcs++);
-
- /* Build hashtab for address lookup. */
- indirect_htab = htab_create (num_ind_funcs);
- map_entry = GOMP_INDIRECT_ADDR_MAP;
-
- for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
- {
- struct indirect_map_t element = { *map_entry, NULL };
- hash_entry_type *slot = htab_find_slot (&indirect_htab, &element,
- INSERT);
- *slot = (hash_entry_type) map_entry;
- }
- }
-}
+ /* Count the number of entries in the NULL-terminated address map. */
+ for (map_entry = (uint64_t *) GOMP_INDIRECT_ADDR_MAP; *map_entry;
+ map_entry += 2, num_ind_funcs++);
-void *
-GOMP_target_map_indirect_ptr (void *ptr)
-{
- /* NULL pointers always resolve to NULL. */
- if (!ptr)
- return ptr;
-
- assert (indirect_htab);
+ /* Build hashtab for address lookup. */
+ htab_t indirect_htab = htab_create (num_ind_funcs);
+ GOMP_INDIRECT_ADDR_HMAP = (void *) indirect_htab;
- struct indirect_map_t element = { ptr, NULL };
- hash_entry_type entry = htab_find (indirect_htab, &element);
- return entry ? entry->target_addr : ptr;
+ map_entry = GOMP_INDIRECT_ADDR_MAP;
+ for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
+ {
+ hash_entry_type element;
+ SET_INDIRECT_ADDRS (element, *map_entry, *(map_entry + 1));
+ hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
+ INSERT);
+ *slot = element;
+ }
}
#else
#define LITTLEENDIAN_CPU
#include "hsa.h"
+#define UNLIKELY(x) (__builtin_expect ((x), 0))
+
+extern void *GOMP_INDIRECT_ADDR_MAP;
+
/* Defined in basic-allocator.c via config/amdgcn/allocator.c. */
void __gcn_lowlat_init (void *heap, size_t size);
int numthreads = __builtin_gcn_dim_size (1);
int teamid = __builtin_gcn_dim_pos(0);
- /* Initialize indirect function support. */
- if (teamid == 0)
+ /* Initialize indirect function support for older libgomp. */
+ if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && teamid == 0))
build_indirect_map ();
/* Set up the global state.
#include <stdlib.h>
#include <string.h>
+#define UNLIKELY(x) (__builtin_expect ((x), 0))
+
+extern void *GOMP_INDIRECT_ADDR_MAP;
+
struct gomp_thread *nvptx_thrs __attribute__((shared,nocommon));
int __gomp_team_num __attribute__((shared,nocommon));
nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
- /* Initialize indirect function support. */
+ /* Initialize indirect function support for older libgomp. */
unsigned int block_id;
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
- if (block_id == 0)
+ if (UNLIKELY (GOMP_INDIRECT_ADDR_MAP != NULL && block_id == 0))
build_indirect_map ();
/* Find the low-latency heap details .... */
must be stringified). */
#define GOMP_ADDITIONAL_ICVS __gomp_additional_icvs
+/* GOMP_INDIRECT_ADDR_HMAP points to a hash table and is to be used by
+ newer libgomp, while GOMP_INDIRECT_ADDR_MAP points to a linear table
+ and exists for backward compatibility. */
#define GOMP_INDIRECT_ADDR_MAP __gomp_indirect_addr_map
+#define GOMP_INDIRECT_ADDR_HMAP __gomp_indirect_addr_hmap
/* Miscellaneous functions. */
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__ ((malloc));
--- /dev/null
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
+
+ Contributed by Siemens.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+/* This file is used to create a hash table on the host that is supposed
+ to get use on the device - that's for the 'indirect' clause feature.
+
+ In order to habe host initialization work, the pointer sizes must be
+ the same - and either the the endianess or the host-device memcopy
+ has to take of it. */
+
+typedef unsigned __int128 hash_entry_type;
+#define INDIRECT_HOST_ADDR(p) ((void *) (uintptr_t) p)
+#define INDIRECT_DEV_ADDR(p) ((void*) (uintptr_t) (p >> 64))
+#define SET_INDIRECT_ADDRS(p, h, d) \
+ p = (((unsigned __int128) h) + (((unsigned __int128) d) << 64))
+
+_Static_assert (sizeof (unsigned __int128) == 2 * sizeof (void*),
+ "hash_entry_type size mismatch");
+
+static inline void *htab_alloc (size_t size) {
+ return malloc (size);
+}
+
+static inline void htab_free (void *ptr) { free (ptr); }
+
+#include "hashtab.h"
+
+static inline hashval_t
+htab_hash (hash_entry_type element)
+{
+ return hash_pointer (INDIRECT_HOST_ADDR (element));
+}
+
+static inline bool
+htab_eq (hash_entry_type x, hash_entry_type y)
+{
+ return INDIRECT_HOST_ADDR (x) == INDIRECT_HOST_ADDR (x);
+}
+
+void*
+create_target_indirect_map (size_t *h_size, size_t count,
+ uint64_t *host_addrs, uint64_t *device_addrs)
+{
+ assert (htab_find); /* Silence -Werror=unused-function. */
+
+ htab_t indirect_htab = htab_create (count);
+
+ hash_entry_type element;
+ for (int i = 0; i < count; i++)
+ {
+ SET_INDIRECT_ADDRS (element, host_addrs[i], device_addrs[i]);
+ hash_entry_type *slot = htab_find_slot (&indirect_htab, element,
+ INSERT);
+ *slot = element;
+ }
+ *h_size = (sizeof (struct htab)
+ + htab_size (indirect_htab) * sizeof (hash_entry_type));
+ return (void*) indirect_htab;
+}
#include "oacc-int.h"
#include <assert.h>
+/* Create hash-table for declare target's indirect clause on the host;
+ see build-target-indirect-htab.h for details. */
+#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+static void* create_target_indirect_map (size_t *, size_t,
+ uint64_t *, uint64_t *);
+#endif
+
/* These probably won't be in elf.h for a while. */
#ifndef R_AMDGPU_NONE
#define R_AMDGPU_NONE 0
(void*) ind_funcs_table_addr,
sizeof (ind_funcs_table));
- /* Build host->target address map for indirect functions. */
- uint64_t ind_fn_map[ind_func_count * 2 + 1];
- for (unsigned i = 0; i < ind_func_count; i++)
- {
- ind_fn_map[i * 2] = host_ind_fn_table[i];
- ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
- GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
- i, host_ind_fn_table[i], ind_funcs_table[i]);
- }
- ind_fn_map[ind_func_count * 2] = 0;
-
- /* Write the map onto the target. */
- void *map_target_addr
- = GOMP_OFFLOAD_alloc (agent->device_id, sizeof (ind_fn_map));
- GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
-
- GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
- (void*) ind_fn_map,
- sizeof (ind_fn_map));
+ /* For newer binaries, the hash table for 'indirect' is created on the
+ host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
+ device side - and have to create the table themselves using
+ GOMP_INDIRECT_ADDR_MAP. */
- /* Write address of the map onto the target. */
hsa_executable_symbol_t symbol;
-
+ bool host_init_htab = true;
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
status
= hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
- XSTRING (GOMP_INDIRECT_ADDR_MAP),
+ XSTRING (GOMP_INDIRECT_ADDR_HMAP),
agent->id, 0, &symbol);
+ if (status != HSA_STATUS_SUCCESS)
+ #endif
+ {
+ host_init_htab = false;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ XSTRING (GOMP_INDIRECT_ADDR_MAP), agent->id, 0, &symbol);
+ }
if (status != HSA_STATUS_SUCCESS)
hsa_fatal ("Could not find GOMP_INDIRECT_ADDR_MAP in code object",
status);
-
uint64_t varptr;
uint32_t varsize;
hsa_fatal ("Could not extract a variable size from its symbol",
status);
- GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_MAP at %lx with size %d\n",
- varptr, varsize);
+ GCN_DEBUG ("Found GOMP_INDIRECT_ADDR_%sMAP at %lx with size %d\n",
+ host_init_htab ? "H" : "", varptr, varsize);
+ void *map_target_addr;
+ if (!host_init_htab)
+ {
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_func_count * 2 + 1];
+ for (unsigned i = 0; i < ind_func_count; i++)
+ {
+ ind_fn_map[i * 2] = host_ind_fn_table[i];
+ ind_fn_map[i * 2 + 1] = ind_funcs_table[i];
+ GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+ i, host_ind_fn_table[i], ind_funcs_table[i]);
+ }
+ ind_fn_map[ind_func_count * 2] = 0;
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
+ sizeof (ind_fn_map));
+ GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+ (void*) ind_fn_map, sizeof (ind_fn_map));
+ }
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ else
+ {
+ /* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */
+ size_t host_map_size;
+ void *host_map;
+ host_map = create_target_indirect_map (&host_map_size, ind_func_count,
+ host_ind_fn_table,
+ ind_funcs_table);
+ for (unsigned i = 0; i < ind_func_count; i++)
+ GCN_DEBUG ("Indirect function %d: %lx->%lx\n",
+ i, host_ind_fn_table[i], ind_funcs_table[i]);
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (agent->device_id,
+ host_map_size);
+ GOMP_OFFLOAD_host2dev (agent->device_id, map_target_addr,
+ host_map, host_map_size);
+ }
+ #endif
+
+ GCN_DEBUG ("Allocated indirect map at %p\n", map_target_addr);
+
+ /* Write address of the map onto the target. */
GOMP_OFFLOAD_host2dev (agent->device_id, (void *) varptr,
&map_target_addr,
sizeof (map_target_addr));
free (data);
}
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ #include "build-target-indirect-htab.h"
+#endif
+
/* }}} */
#include <errno.h>
#include <stdlib.h>
+/* Create hash-table for declare target's indirect clause on the host;
+ see build-target-indirect-htab.h for details. */
+#define USE_HASHTAB_LOOKUP_FOR_INDIRECT
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+static void* create_target_indirect_map (size_t *, size_t,
+ uint64_t *, uint64_t *);
+#endif
+
/* An arbitrary fixed limit (128MB) for the size of the OpenMP soft stacks
block to cache between kernel invocations. For soft-stacks blocks bigger
than this, we will free the block before attempting another GPU memory
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
- /* Build host->target address map for indirect functions. */
- uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
- for (unsigned k = 0; k < ind_fn_entries; k++)
- {
- ind_fn_map[k * 2] = host_ind_fn_table[k];
- ind_fn_map[k * 2 + 1] = ind_fn_table[k];
- GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
- k, host_ind_fn_table[k], ind_fn_table[k]);
- }
- ind_fn_map[ind_fn_entries * 2] = 0;
-
- /* Write the map onto the target. */
- void *map_target_addr
- = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
- GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
-
- GOMP_OFFLOAD_host2dev (ord, map_target_addr,
- (void*) ind_fn_map,
- sizeof (ind_fn_map));
+ /* For newer binaries, the hash table for 'indirect' is created on the
+ host. Older binaries don't have GOMP_INDIRECT_ADDR_HMAP on the
+ device side - and have to create the table themselves using
+ GOMP_INDIRECT_ADDR_MAP. */
- /* Write address of the map onto the target. */
CUdeviceptr varptr;
size_t varsize;
+ bool host_init_htab = true;
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
- module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+ module, XSTRING (GOMP_INDIRECT_ADDR_HMAP));
+ if (r != CUDA_SUCCESS)
+ #endif
+ {
+ host_init_htab = false;
+ r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &varptr, &varsize,
+ module, XSTRING (GOMP_INDIRECT_ADDR_MAP));
+ }
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("Indirect map variable not found in image: %s",
cuda_error (r));
-
GOMP_PLUGIN_debug (0,
- "Indirect map variable found at %llx with size %ld\n",
+ "%s-style indirect map variable found at %llx with "
+ "size %ld\n", host_init_htab ? "New" : "Old",
varptr, varsize);
+ void *map_target_addr;
+ if (!host_init_htab)
+ {
+ /* Build host->target address map for indirect functions. */
+ uint64_t ind_fn_map[ind_fn_entries * 2 + 1];
+ for (unsigned k = 0; k < ind_fn_entries; k++)
+ {
+ ind_fn_map[k * 2] = host_ind_fn_table[k];
+ ind_fn_map[k * 2 + 1] = ind_fn_table[k];
+ GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+ k, host_ind_fn_table[k], ind_fn_table[k]);
+ }
+ ind_fn_map[ind_fn_entries * 2] = 0;
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (ord, sizeof (ind_fn_map));
+ GOMP_OFFLOAD_host2dev (ord, map_target_addr,
+ (void *) ind_fn_map, sizeof (ind_fn_map));
+ }
+ #ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ else
+ {
+ /* FIXME: Handle multi-kernel load and unload, cf. PR 114690. */
+ size_t host_map_size;
+ void *host_map;
+ host_map = create_target_indirect_map (&host_map_size, ind_fn_entries,
+ host_ind_fn_table,
+ ind_fn_table);
+ for (unsigned k = 0; k < ind_fn_entries; k++)
+ GOMP_PLUGIN_debug (0, "Indirect function %d: %lx->%lx\n",
+ k, host_ind_fn_table[k], ind_fn_table[k]);
+ /* Write the map onto the target. */
+ map_target_addr = GOMP_OFFLOAD_alloc (ord, host_map_size);
+ GOMP_OFFLOAD_host2dev (ord, map_target_addr, host_map, host_map_size);
+ }
+ #endif
+
+ GOMP_PLUGIN_debug (0, "Allocated indirect map at %p\n", map_target_addr);
+
+ /* Write address of the map onto the target. */
GOMP_OFFLOAD_host2dev (ord, (void *) varptr, &map_target_addr,
sizeof (map_target_addr));
}
}
/* TODO: Implement GOMP_OFFLOAD_async_run. */
+
+#ifdef USE_HASHTAB_LOOKUP_FOR_INDIRECT
+ #include "build-target-indirect-htab.h"
+#endif