Fix up some USM corner cases.
libgomp/ChangeLog:
* libgomp.h (OFFSET_USM): New macro.
* target.c (gomp_map_pointer): Handle USM mappings.
(gomp_map_val): Handle OFFSET_USM.
(gomp_map_vars_internal): Move USM check earlier, and use OFFSET_USM.
Add OFFSET_USM check to the second mapping pass.
* testsuite/libgomp.fortran/usm-1.f90: New test.
* testsuite/libgomp.fortran/usm-2.f90: New test.
+2022-12-16 Andrew Stubbs <ams@codesourcery.com>
+
+ * libgomp.h (OFFSET_USM): New macro.
+ * target.c (gomp_map_pointer): Handle USM mappings.
+ (gomp_map_val): Handle OFFSET_USM.
+ (gomp_map_vars_internal): Move USM check earlier, and use OFFSET_USM.
+ Add OFFSET_USM check to the second mapping pass.
+ * testsuite/libgomp.fortran/usm-1.f90: New test.
+ * testsuite/libgomp.fortran/usm-2.f90: New test.
+
2022-12-13 Marcel Vollweiler <marcel@codesourcery.com>
* target.c (omp_target_is_accessible): Handle unified shared memory.
#define OFFSET_INLINED (~(uintptr_t) 0)
#define OFFSET_POINTER (~(uintptr_t) 1)
#define OFFSET_STRUCT (~(uintptr_t) 2)
+#define OFFSET_USM (~(uintptr_t) 3)
/* Auxiliary structure for infrequently-used or API-specific data. */
{
if (allow_zero_length_array_sections)
cur_node.tgt_offset = 0;
+ else if (devicep->is_usm_ptr_func
+ && devicep->is_usm_ptr_func ((void*)cur_node.host_start))
+ cur_node.tgt_offset = cur_node.host_start;
else
{
gomp_mutex_unlock (&devicep->lock);
switch (tgt->list[i].offset)
{
case OFFSET_INLINED:
+ case OFFSET_USM:
return (uintptr_t) hostaddrs[i];
case OFFSET_POINTER:
{
int kind = get_kind (short_mapkind, kinds, i);
bool implicit = get_implicit (short_mapkind, kinds, i);
+ tgt->list[i].offset = 0;
if (hostaddrs[i] == NULL
|| (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
{
tgt->list[i].offset = OFFSET_INLINED;
continue;
}
+ else if (devicep->is_usm_ptr_func
+ && devicep->is_usm_ptr_func (hostaddrs[i]))
+ {
+ /* The memory is visible from both host and target
+ so nothing needs to be moved. */
+ tgt->list[i].key = NULL;
+ tgt->list[i].offset = OFFSET_USM;
+ continue;
+ }
else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
|| (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
{
tgt->list[i].offset = 0;
continue;
}
- else if (devicep->is_usm_ptr_func
- && devicep->is_usm_ptr_func (hostaddrs[i]))
- {
- /* The memory is visible from both host and target
- so nothing needs to be moved. */
- tgt->list[i].key = NULL;
- tgt->list[i].offset = OFFSET_INLINED;
- continue;
- }
else if ((kind & typemask) == GOMP_MAP_STRUCT)
{
size_t first = i + 1;
bool implicit = get_implicit (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL)
continue;
+ if (tgt->list[i].offset == OFFSET_USM)
+ continue;
switch (kind & typemask)
{
size_t align, len, first, last;
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target omp_usm }
+
+! Ensure that USM works for implicit mappings.
+! This needs to cover both the initial mapping scan and the rescan that
+! happens when some of the mappings aren't no-ops (in this cases there are
+! some hidden pointers).
+
+program usm
+ use iso_fortran_env
+ use omp_lib
+ implicit none
+
+ !$omp requires unified_shared_memory
+
+ integer, parameter :: N = 1024
+ real(real64), allocatable :: x(:), y(:)
+ integer :: i
+
+ allocate(x(N), y(N))
+ !$omp target teams distribute parallel do simd
+ do i=1,N
+ y(i) = x(i)
+ enddo
+
+ deallocate(x,y)
+
+end program usm
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target omp_usm }
+
+! Ensure that USM doesn't break the use_device_ptr clause (host pointers and
+! target pointers being "unified").
+
+program usm
+ use iso_fortran_env
+ use omp_lib
+ implicit none
+
+ !$omp requires unified_shared_memory
+
+ integer, parameter :: N = 1024
+ real(real64), allocatable :: x(:), y(:)
+ integer :: i
+
+ allocate(x(N),y(N))
+
+ !$omp target data map(x)
+ ! The "i" variable is not explictly mapped yet, so ensures that both
+ ! mapping scan passes are tested.
+ !$omp target data map(i) use_device_ptr(x)
+ !$omp target teams distribute parallel do simd
+ do i=1,N
+ y(i) = x(i)
+ enddo
+ !$omp end target data
+ !$omp end target data
+
+ deallocate(x,y)
+
+end program usm