#include "libgomp.h"
#include <stdlib.h>
#include <string.h>
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
#include <dlfcn.h>
#endif
#define omp_max_predefined_alloc omp_thread_mem_alloc
-enum gomp_memkind_kind
+enum gomp_numa_memkind_kind
{
GOMP_MEMKIND_NONE = 0,
#define GOMP_MEMKIND_KINDS \
#define GOMP_MEMKIND_KIND(kind) GOMP_MEMKIND_##kind
GOMP_MEMKIND_KINDS,
#undef GOMP_MEMKIND_KIND
- GOMP_MEMKIND_COUNT
+ GOMP_MEMKIND_COUNT,
+ GOMP_MEMKIND_LIBNUMA = GOMP_MEMKIND_COUNT
};
struct omp_allocator_data
unsigned int fallback : 8;
unsigned int pinned : 1;
unsigned int partition : 7;
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
unsigned int memkind : 8;
#endif
#ifndef HAVE_SYNC_BUILTINS
void *pad;
};
+struct gomp_libnuma_data
+{
+ void *numa_handle;
+ void *(*numa_alloc_local) (size_t);
+ void *(*numa_realloc) (void *, size_t, size_t);
+ void (*numa_free) (void *, size_t);
+};
+
struct gomp_memkind_data
{
void *memkind_handle;
void **kinds[GOMP_MEMKIND_COUNT];
};
+#ifdef LIBGOMP_USE_LIBNUMA
+static struct gomp_libnuma_data *libnuma_data;
+static pthread_once_t libnuma_data_once = PTHREAD_ONCE_INIT;
+
+static void
+gomp_init_libnuma (void)
+{
+ void *handle = dlopen ("libnuma.so.1", RTLD_LAZY);
+ struct gomp_libnuma_data *data;
+
+ data = calloc (1, sizeof (struct gomp_libnuma_data));
+ if (data == NULL)
+ {
+ if (handle)
+ dlclose (handle);
+ return;
+ }
+ if (!handle)
+ {
+ __atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE);
+ return;
+ }
+ data->numa_handle = handle;
+ data->numa_alloc_local
+ = (__typeof (data->numa_alloc_local)) dlsym (handle, "numa_alloc_local");
+ data->numa_realloc
+ = (__typeof (data->numa_realloc)) dlsym (handle, "numa_realloc");
+ data->numa_free
+ = (__typeof (data->numa_free)) dlsym (handle, "numa_free");
+ __atomic_store_n (&libnuma_data, data, MEMMODEL_RELEASE);
+}
+
+static struct gomp_libnuma_data *
+gomp_get_libnuma (void)
+{
+ struct gomp_libnuma_data *data
+ = __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE);
+ if (data)
+ return data;
+ pthread_once (&libnuma_data_once, gomp_init_libnuma);
+ return __atomic_load_n (&libnuma_data, MEMMODEL_ACQUIRE);
+}
+#endif
+
#ifdef LIBGOMP_USE_MEMKIND
static struct gomp_memkind_data *memkind_data;
static pthread_once_t memkind_data_once = PTHREAD_ONCE_INIT;
struct omp_allocator_data data
= { memspace, 1, ~(uintptr_t) 0, 0, 0, omp_atv_contended, omp_atv_all,
omp_atv_default_mem_fb, omp_atv_false, omp_atv_environment,
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
GOMP_MEMKIND_NONE
#endif
};
switch (memspace)
{
- case omp_high_bw_mem_space:
#ifdef LIBGOMP_USE_MEMKIND
+ case omp_high_bw_mem_space:
struct gomp_memkind_data *memkind_data;
memkind_data = gomp_get_memkind ();
if (data.partition == omp_atv_interleaved
data.memkind = GOMP_MEMKIND_HBW_PREFERRED;
break;
}
-#endif
break;
case omp_large_cap_mem_space:
-#ifdef LIBGOMP_USE_MEMKIND
memkind_data = gomp_get_memkind ();
if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM_ALL])
data.memkind = GOMP_MEMKIND_DAX_KMEM_ALL;
else if (memkind_data->kinds[GOMP_MEMKIND_DAX_KMEM])
data.memkind = GOMP_MEMKIND_DAX_KMEM;
-#endif
break;
+#endif
default:
#ifdef LIBGOMP_USE_MEMKIND
if (data.partition == omp_atv_interleaved)
break;
}
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (data.memkind == GOMP_MEMKIND_NONE && data.partition == omp_atv_nearest)
+ {
+ data.memkind = GOMP_MEMKIND_LIBNUMA;
+ libnuma_data = gomp_get_libnuma ();
+ }
+#endif
+
/* No support for this so far. */
if (data.pinned)
return omp_null_allocator;
struct omp_allocator_data *allocator_data;
size_t new_size, new_alignment;
void *ptr, *ret;
-#ifdef LIBGOMP_USE_MEMKIND
- enum gomp_memkind_kind memkind;
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
+ enum gomp_numa_memkind_kind memkind;
#endif
if (__builtin_expect (size == 0, 0))
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
new_alignment = allocator_data->alignment;
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = allocator_data->memkind;
#endif
}
allocator_data = NULL;
if (new_alignment < sizeof (void *))
new_alignment = sizeof (void *);
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = GOMP_MEMKIND_NONE;
+#endif
+#ifdef LIBGOMP_USE_MEMKIND
if (allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (allocator == omp_large_cap_mem_alloc)
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (memkind == GOMP_MEMKIND_LIBNUMA)
+ ptr = libnuma_data->numa_alloc_local (new_size);
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
}
else
{
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (memkind == GOMP_MEMKIND_LIBNUMA)
+ ptr = libnuma_data->numa_alloc_local (new_size);
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
{
case omp_atv_default_mem_fb:
if ((new_alignment > sizeof (void *) && new_alignment > alignment)
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
|| memkind
#endif
|| (allocator_data
gomp_mutex_unlock (&allocator_data->lock);
#endif
}
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (allocator_data->memkind == GOMP_MEMKIND_LIBNUMA)
+ {
+ libnuma_data->numa_free (data->ptr, data->size);
+ return;
+ }
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (allocator_data->memkind)
{
#ifdef LIBGOMP_USE_MEMKIND
else
{
- enum gomp_memkind_kind memkind = GOMP_MEMKIND_NONE;
+ enum gomp_numa_memkind_kind memkind = GOMP_MEMKIND_NONE;
if (data->allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (data->allocator == omp_large_cap_mem_alloc)
struct omp_allocator_data *allocator_data;
size_t new_size, size_temp, new_alignment;
void *ptr, *ret;
-#ifdef LIBGOMP_USE_MEMKIND
- enum gomp_memkind_kind memkind;
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
+ enum gomp_numa_memkind_kind memkind;
#endif
if (__builtin_expect (size == 0 || nmemb == 0, 0))
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
new_alignment = allocator_data->alignment;
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = allocator_data->memkind;
#endif
}
allocator_data = NULL;
if (new_alignment < sizeof (void *))
new_alignment = sizeof (void *);
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = GOMP_MEMKIND_NONE;
+#endif
+#ifdef LIBGOMP_USE_MEMKIND
if (allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (allocator == omp_large_cap_mem_alloc)
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (memkind == GOMP_MEMKIND_LIBNUMA)
+ /* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning
+ memory that is initialized to zero. */
+ ptr = libnuma_data->numa_alloc_local (new_size);
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
}
else
{
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (memkind == GOMP_MEMKIND_LIBNUMA)
+ /* numa_alloc_local uses mmap with MAP_ANONYMOUS, returning
+ memory that is initialized to zero. */
+ ptr = libnuma_data->numa_alloc_local (new_size);
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
{
case omp_atv_default_mem_fb:
if ((new_alignment > sizeof (void *) && new_alignment > alignment)
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
|| memkind
#endif
|| (allocator_data
size_t new_size, old_size, new_alignment, old_alignment;
void *new_ptr, *ret;
struct omp_mem_header *data;
-#ifdef LIBGOMP_USE_MEMKIND
- enum gomp_memkind_kind memkind, free_memkind;
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
+ enum gomp_numa_memkind_kind memkind, free_memkind;
#endif
if (__builtin_expect (ptr == NULL, 0))
allocator_data = (struct omp_allocator_data *) allocator;
if (new_alignment < allocator_data->alignment)
new_alignment = allocator_data->alignment;
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = allocator_data->memkind;
#endif
}
else
{
allocator_data = NULL;
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
memkind = GOMP_MEMKIND_NONE;
+#endif
+#ifdef LIBGOMP_USE_MEMKIND
if (allocator == omp_high_bw_mem_alloc)
memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (allocator == omp_large_cap_mem_alloc)
if (free_allocator > omp_max_predefined_alloc)
{
free_allocator_data = (struct omp_allocator_data *) free_allocator;
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
free_memkind = free_allocator_data->memkind;
#endif
}
else
{
free_allocator_data = NULL;
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
free_memkind = GOMP_MEMKIND_NONE;
+#endif
+#ifdef LIBGOMP_USE_MEMKIND
if (free_allocator == omp_high_bw_mem_alloc)
free_memkind = GOMP_MEMKIND_HBW_PREFERRED;
else if (free_allocator == omp_large_cap_mem_alloc)
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (memkind == GOMP_MEMKIND_LIBNUMA)
+ {
+ if (prev_size)
+ new_ptr = libnuma_data->numa_realloc (data->ptr, data->size,
+ new_size);
+ else
+ new_ptr = libnuma_data->numa_alloc_local (new_size);
+ }
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
}
else if (new_alignment == sizeof (void *)
&& old_alignment == sizeof (struct omp_mem_header)
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
&& memkind == free_memkind
#endif
&& (free_allocator_data == NULL
|| free_allocator_data->pool_size == ~(uintptr_t) 0))
{
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (memkind == GOMP_MEMKIND_LIBNUMA)
+ new_ptr = libnuma_data->numa_realloc (data->ptr, data->size, new_size);
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
}
else
{
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (memkind == GOMP_MEMKIND_LIBNUMA)
+ new_ptr = libnuma_data->numa_alloc_local (new_size);
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (memkind)
{
gomp_mutex_unlock (&free_allocator_data->lock);
#endif
}
+#ifdef LIBGOMP_USE_LIBNUMA
+ if (free_memkind == GOMP_MEMKIND_LIBNUMA)
+ {
+ libnuma_data->numa_free (data->ptr, data->size);
+ return ret;
+ }
+# ifdef LIBGOMP_USE_MEMKIND
+ else
+# endif
+#endif
#ifdef LIBGOMP_USE_MEMKIND
if (free_memkind)
{
{
case omp_atv_default_mem_fb:
if (new_alignment > sizeof (void *)
-#ifdef LIBGOMP_USE_MEMKIND
+#if defined(LIBGOMP_USE_MEMKIND) || defined(LIBGOMP_USE_LIBNUMA)
|| memkind
#endif
|| (allocator_data
#include "libgomp.h"
#if defined(PLUGIN_SUPPORT) && defined(LIBGOMP_USE_PTHREADS)
#define LIBGOMP_USE_MEMKIND
+#define LIBGOMP_USE_LIBNUMA
#endif
#include "../../allocator.c"
specification and then either the single-character field type or its long
name enclosed in curly braces; using @code{%%} will display a literal percent.
The size specification consists of an optional @code{0.} or @code{.} followed
-by a positive integer, specifing the minimal width of the output. With
+by a positive integer, specifying the minimal width of the output. With
@code{0.} and numerical values, the output is padded with zeros on the left;
with @code{.}, the output is padded by spaces on the left; otherwise, the
output is padded by spaces on the right. If unset, the value is
@tab value returned by
@code{omp_get_ancestor_thread_num(omp_get_level()-1)}
@item H @tab host @tab name of the host that executes the thread
-@item P @tab process_id @tab process identifier
-@item i @tab native_thread_id @tab native thread identifier
+@item P @tab process_id @tab process identifier
+@item i @tab native_thread_id @tab native thread identifier
@item A @tab thread_affinity
@tab comma separated list of integer values or ranges, representing the
processors on which a process might execute, subject to affinity
@menu
* Implementation-defined ICV Initialization::
* OpenMP Context Selectors::
-* Memory allocation with libmemkind::
+* Memory allocation::
@end menu
@node Implementation-defined ICV Initialization
@tab See @code{-march=} in ``Nvidia PTX Options''
@end multitable
-@node Memory allocation with libmemkind
-@section Memory allocation with libmemkind
+@node Memory allocation
+@section Memory allocation
For the memory spaces, the following applies:
@itemize
@itemize
@item the memory space @code{omp_high_bw_mem_space}
@item the memory space @code{omp_large_cap_mem_space}
-@item the partition trait @code{omp_atv_interleaved}; note that for
+@item the @code{partition} trait @code{interleaved}; note that for
@code{omp_large_cap_mem_space} the allocation will not be interleaved
@end itemize
+On Linux systems, where the @uref{https://github.com/numactl/numactl, numa
+library} (@code{libnuma.so.1}) is available at runtime, it used when creating
+memory allocators requesting
+
+@itemize
+@item the @code{partition} trait @code{nearest}, except when both the
+libmemkind library is available and the memory space is either
+@code{omp_large_cap_mem_space} or @code{omp_high_bw_mem_space}
+@end itemize
+
+Note that the numa library will round up the allocation size to a multiple of
+the system page size; therefore, consider using it only with large data or
+by sharing allocations via the @code{pool_size} trait. Furthermore, the Linux
+kernel does not guarantee that an allocation will always be on the nearest NUMA
+node nor that after reallocation the same node will be used. Note additionally
+that, on Linux, the default setting of the memory placement policy is to use the
+current node; therefore, unless the memory placement policy has been overridden,
+the @code{partition} trait @code{environment} (the default) will be effectively
+a @code{nearest} allocation.
+
Additional notes:
@itemize
@item The @code{pinned} trait is unsupported.
@item For the @code{partition} trait, the partition part size will be the same
as the requested size (i.e. @code{interleaved} or @code{blocked} has no
effect), except for @code{interleaved} when the memkind library is
- available. Furthermore, for @code{nearest} the memory might not be
- on the same NUMA node as thread that allocated the memory; on Linux,
- this is in particular the case when the memory placement policy is
- set to preferred.
+ available. Furthermore, for @code{nearest} and unless the numa library
+ is available, the memory might not be on the same NUMA node as thread
+ that allocated the memory; on Linux, this is in particular the case when
+ the memory placement policy is set to preferred.
@item The @code{access} trait has no effect such that memory is always
accessible by all threads.
@item The @code{sync_hint} trait has no effect.
--- /dev/null
+/* This testcase is mostly the same as alloc-9.c.
+ However, on systems where the numa and/or memkind libraries are
+ installed, libgomp uses those. This test ensures that the minimal
+ features work. Note: No attempt has been made to verify the parition
+ hints interleaved and nearest as the kernal purposely ignore them once
+ in a while and it would also require a 'dlopen' dance.
+
+ memkind is used for omp_high_bw_mem_space, omp_large_cap_mem_space
+ and partition = interleaved, albeit it won't be interleaved for
+ omp_large_cap_mem_space.
+
+ numa is used for partition = nearest, unless memkind is used. */
+
+#include <omp.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+const omp_alloctrait_t traits2[]
+= { { omp_atk_alignment, 16 },
+ { omp_atk_sync_hint, omp_atv_default },
+ { omp_atk_access, omp_atv_default },
+ { omp_atk_pool_size, 1024 },
+ { omp_atk_fallback, omp_atv_default_mem_fb },
+ { omp_atk_partition, omp_atv_nearest } };
+omp_alloctrait_t traits3[]
+= { { omp_atk_sync_hint, omp_atv_uncontended },
+ { omp_atk_alignment, 32 },
+ { omp_atk_access, omp_atv_all },
+ { omp_atk_pool_size, 512 },
+ { omp_atk_fallback, omp_atv_allocator_fb },
+ { omp_atk_fb_data, 0 },
+ { omp_atk_partition, omp_atv_interleaved } };
+const omp_alloctrait_t traits4[]
+= { { omp_atk_alignment, 128 },
+ { omp_atk_pool_size, 1024 },
+ { omp_atk_fallback, omp_atv_null_fb } };
+
+int
+main ()
+{
+ int *volatile p = (int *) omp_alloc (3 * sizeof (int), omp_default_mem_alloc);
+ int *volatile q;
+ int *volatile r;
+ omp_alloctrait_t traits[4]
+ = { { omp_atk_alignment, 64 },
+ { omp_atk_fallback, omp_atv_null_fb },
+ { omp_atk_pool_size, 4096 },
+ { omp_atk_partition, omp_atv_nearest } };
+ omp_alloctrait_t traits5[2]
+ = { { omp_atk_fallback, omp_atv_null_fb },
+ { omp_atk_pool_size, 4096 } };
+ omp_allocator_handle_t a, a2;
+
+ if ((((uintptr_t) p) % __alignof (int)) != 0)
+ abort ();
+ p[0] = 1;
+ p[1] = 2;
+ p[2] = 3;
+ p = (int *) omp_realloc (p, 4 * sizeof (int), omp_high_bw_mem_alloc, omp_high_bw_mem_alloc);
+ if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 1 || p[1] != 2 || p[2] != 3)
+ abort ();
+ p[0] = 4;
+ p[1] = 5;
+ p[2] = 6;
+ p[3] = 7;
+ p = (int *) omp_realloc (p, 2 * sizeof (int), omp_high_bw_mem_alloc, omp_high_bw_mem_alloc);
+ if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 4 || p[1] != 5)
+ abort ();
+ p[0] = 8;
+ p[1] = 9;
+ if (omp_realloc (p, 0, omp_null_allocator, omp_high_bw_mem_alloc) != NULL)
+ abort ();
+ p = (int *) omp_realloc (NULL, 2 * sizeof (int), omp_large_cap_mem_alloc, omp_null_allocator);
+ if ((((uintptr_t) p) % __alignof (int)) != 0)
+ abort ();
+ p[0] = 1;
+ p[1] = 2;
+ p = (int *) omp_realloc (p, 5 * sizeof (int), omp_large_cap_mem_alloc, omp_large_cap_mem_alloc);
+ if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 1 || p[1] != 2)
+ abort ();
+ p[0] = 3;
+ p[1] = 4;
+ p[2] = 5;
+ p[3] = 6;
+ p[4] = 7;
+ omp_free (p, omp_null_allocator);
+ omp_set_default_allocator (omp_large_cap_mem_alloc);
+ if (omp_realloc (NULL, 0, omp_null_allocator, omp_null_allocator) != NULL)
+ abort ();
+ p = (int *) omp_alloc (sizeof (int), omp_null_allocator);
+ if ((((uintptr_t) p) % __alignof (int)) != 0)
+ abort ();
+ p[0] = 3;
+ p = (int *) omp_realloc (p, 3 * sizeof (int), omp_null_allocator, omp_null_allocator);
+ if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] != 3)
+ abort ();
+ p[0] = 4;
+ p[1] = 5;
+ p[2] = 6;
+ if (omp_realloc (p, 0, omp_null_allocator, omp_get_default_allocator ()) != NULL)
+ abort ();
+ a = omp_init_allocator (omp_default_mem_space, 4, traits);
+ if (a == omp_null_allocator)
+ abort ();
+ p = (int *) omp_alloc (sizeof (int), a);
+ if ((((uintptr_t) p) % 64) != 0)
+ abort ();
+ p[0] = 7;
+ p = (int *) omp_realloc (p, 3072, a, a);
+ if ((((uintptr_t) p) % 64) != 0 || p[0] != 7)
+ abort ();
+ p[0] = 1;
+ p[3071 / sizeof (int)] = 2;
+ q = (int *) omp_alloc (sizeof (int), a);
+ if ((((uintptr_t) q) % 64) != 0)
+ abort ();
+ q[0] = 8;
+ if (omp_realloc (q, 3072, a, a) != NULL)
+ abort ();
+ omp_free (p, a);
+ omp_free (q, a);
+ p = (int *) omp_alloc (sizeof (int), a);
+ p[0] = 42;
+ p = (int *) omp_realloc (p, 3072, a, a);
+ if (p[0] != 42)
+ abort ();
+ p[0] = 3;
+ p[3071 / sizeof (int)] = 4;
+ omp_realloc (p, 0, omp_null_allocator, omp_null_allocator);
+ omp_set_default_allocator (a);
+ if (omp_get_default_allocator () != a)
+ abort ();
+ p = (int *) omp_alloc (31, omp_null_allocator);
+ if (p == NULL)
+ abort ();
+ p = (int *) omp_realloc (p, 3072, omp_null_allocator, omp_null_allocator);
+ if (p == NULL)
+ abort ();
+ q = (int *) omp_alloc (sizeof (int), omp_null_allocator);
+ if (q == NULL)
+ abort ();
+ if (omp_realloc (q, 3072, omp_null_allocator, omp_null_allocator) != NULL)
+ abort ();
+ omp_free (p, a);
+ omp_free (q, a);
+ omp_destroy_allocator (a);
+
+ a = omp_init_allocator (omp_large_cap_mem_space, 2, traits5);
+ if (a == omp_null_allocator)
+ abort ();
+ omp_set_default_allocator (a);
+ if (omp_get_default_allocator () != a)
+ abort ();
+ p = (int *) omp_alloc (3071, omp_null_allocator);
+ if (p == NULL)
+ abort ();
+ p = (int *) omp_realloc (p, 3072, omp_null_allocator, omp_null_allocator);
+ if (p == NULL)
+ abort ();
+ q = (int *) omp_alloc (sizeof (int), omp_null_allocator);
+ if (q == NULL)
+ abort ();
+ if (omp_realloc (q, 3072, omp_null_allocator, omp_null_allocator) != NULL)
+ abort ();
+ omp_free (p, a);
+ omp_free (q, a);
+ omp_destroy_allocator (a);
+
+ a = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits2) / sizeof (traits2[0]),
+ traits2);
+ if (a == omp_null_allocator)
+ abort ();
+ if (traits3[5].key != omp_atk_fb_data)
+ abort ();
+ traits3[5].value = (uintptr_t) a;
+ a2 = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits3) / sizeof (traits3[0]),
+ traits3);
+ if (a2 == omp_null_allocator)
+ abort ();
+ p = (int *) omp_alloc (sizeof (int), a2);
+ if ((((uintptr_t) p) % 32) != 0)
+ abort ();
+ p[0] = 84;
+ p = (int *) omp_realloc (p, 380, a2, a2);
+ if ((((uintptr_t) p) % 32) != 0 || p[0] != 84)
+ abort ();
+ p[0] = 5;
+ p[379 / sizeof (int)] = 6;
+ q = (int *) omp_alloc (sizeof (int), a2);
+ if ((((uintptr_t) q) % 32) != 0)
+ abort ();
+ q[0] = 42;
+ q = (int *) omp_realloc (q, 768, a2, a2);
+ if ((((uintptr_t) q) % 16) != 0 || q[0] != 42)
+ abort ();
+ q[0] = 7;
+ q[767 / sizeof (int)] = 8;
+ r = (int *) omp_realloc (NULL, 512, a2, omp_null_allocator);
+ if ((((uintptr_t) r) % __alignof (int)) != 0)
+ abort ();
+ r[0] = 9;
+ r[511 / sizeof (int)] = 10;
+ omp_free (p, omp_null_allocator);
+ omp_free (q, a2);
+ omp_free (r, omp_null_allocator);
+ p = (int *) omp_alloc (sizeof (int), a2);
+ if ((((uintptr_t) p) % 32) != 0)
+ abort ();
+ p[0] = 85;
+ p = (int *) omp_realloc (p, 320, a, a2);
+ if ((((uintptr_t) p) % 16) != 0 || p[0] != 85)
+ abort ();
+ p[0] = 5;
+ p[319 / sizeof (int)] = 6;
+ q = (int *) omp_alloc (sizeof (int), a);
+ if ((((uintptr_t) q) % 16) != 0)
+ abort ();
+ q[0] = 43;
+ q = (int *) omp_realloc (q, 320, a2, a);
+ if ((((uintptr_t) q) % 32) != 0 || q[0] != 43)
+ abort ();
+ q[0] = 44;
+ q[319 / sizeof (int)] = 8;
+ q = (int *) omp_realloc (q, 568, a2, a2);
+ if ((((uintptr_t) q) % 16) != 0 || q[0] != 44)
+ abort ();
+ q[0] = 7;
+ q[567 / sizeof (int)] = 8;
+ omp_free (p, omp_null_allocator);
+ omp_free (q, a2);
+ omp_destroy_allocator (a2);
+ omp_destroy_allocator (a);
+
+ a = omp_init_allocator (omp_large_cap_mem_space,
+ sizeof (traits4) / sizeof (traits4[0]),
+ traits4);
+ if (a == omp_null_allocator)
+ abort ();
+ if (traits3[5].key != omp_atk_fb_data)
+ abort ();
+ traits3[5].value = (uintptr_t) a;
+ a2 = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits3) / sizeof (traits3[0]),
+ traits3);
+ if (a2 == omp_null_allocator)
+ abort ();
+ omp_set_default_allocator (a2);
+#ifdef __cplusplus
+ p = static_cast <int *> (omp_realloc (NULL, 420));
+#else
+ p = (int *) omp_realloc (NULL, 420, omp_null_allocator, omp_null_allocator);
+#endif
+ if ((((uintptr_t) p) % 32) != 0)
+ abort ();
+ p[0] = 5;
+ p[419 / sizeof (int)] = 6;
+ q = (int *) omp_realloc (NULL, sizeof (int), omp_null_allocator, omp_null_allocator);
+ if ((((uintptr_t) q) % 32) != 0)
+ abort ();
+ q[0] = 99;
+ q = (int *) omp_realloc (q, 700, omp_null_allocator, omp_null_allocator);
+ if ((((uintptr_t) q) % 128) != 0 || q[0] != 99)
+ abort ();
+ q[0] = 7;
+ q[699 / sizeof (int)] = 8;
+ if (omp_realloc (NULL, 768, omp_null_allocator, omp_null_allocator) != NULL)
+ abort ();
+#ifdef __cplusplus
+ omp_free (p);
+ if (omp_realloc (q, 0) != NULL)
+ abort ();
+ omp_free (NULL);
+#else
+ omp_free (p, omp_null_allocator);
+ if (omp_realloc (q, 0, omp_null_allocator, omp_null_allocator) != NULL)
+ abort ();
+ omp_free (NULL, omp_null_allocator);
+#endif
+ omp_free (NULL, omp_null_allocator);
+ omp_destroy_allocator (a2);
+ omp_destroy_allocator (a);
+ return 0;
+}
--- /dev/null
+/* This testcase is mostly the same as alloc-8.c.
+ However, on systems where the numa and/or memkind libraries are
+ installed, libgomp uses those. This test ensures that the minimal
+ features work. Note: No attempt has been made to verify the parition
+ hints interleaved and nearest as the kernal purposely ignore them once
+ in a while and it would also require a 'dlopen' dance.
+
+ memkind is used for omp_high_bw_mem_space, omp_large_cap_mem_space
+ and partition = interleaved, albeit it won't be interleaved for
+ omp_large_cap_mem_space.
+
+ numa is used for partition = nearest, unless memkind is used. */
+
+#include <omp.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+const omp_alloctrait_t traits2[]
+= { { omp_atk_alignment, 16 },
+ { omp_atk_sync_hint, omp_atv_default },
+ { omp_atk_access, omp_atv_default },
+ { omp_atk_pool_size, 1024 },
+ { omp_atk_fallback, omp_atv_default_mem_fb },
+ { omp_atk_partition, omp_atv_nearest } };
+omp_alloctrait_t traits3[]
+= { { omp_atk_sync_hint, omp_atv_uncontended },
+ { omp_atk_alignment, 32 },
+ { omp_atk_access, omp_atv_all },
+ { omp_atk_pool_size, 512 },
+ { omp_atk_fallback, omp_atv_allocator_fb },
+ { omp_atk_fb_data, 0 },
+ { omp_atk_partition, omp_atv_interleaved } };
+const omp_alloctrait_t traits4[]
+= { { omp_atk_alignment, 128 },
+ { omp_atk_pool_size, 1024 },
+ { omp_atk_fallback, omp_atv_null_fb } };
+
+static void
+check_all_zero (void *ptr, size_t len)
+{
+ char *p = (char *) ptr;
+ for (size_t i = 0; i < len; i++)
+ if (p[i] != '\0')
+ abort ();
+}
+
+int
+main ()
+{
+ int *volatile p = (int *) omp_aligned_calloc (sizeof (int), 3, sizeof (int), omp_high_bw_mem_alloc);
+ check_all_zero (p, 3*sizeof (int));
+ int *volatile q;
+ int *volatile r;
+ int i;
+ omp_alloctrait_t traits[3]
+ = { { omp_atk_alignment, 64 },
+ { omp_atk_fallback, omp_atv_null_fb },
+ { omp_atk_pool_size, 4096 } };
+ omp_allocator_handle_t a, a2;
+
+ if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0] || p[1] || p[2])
+ abort ();
+ p[0] = 1;
+ p[1] = 2;
+ p[2] = 3;
+ omp_free (p, omp_high_bw_mem_alloc);
+ p = (int *) omp_aligned_calloc (2 * sizeof (int), 1, 2 * sizeof (int), omp_large_cap_mem_alloc);
+ check_all_zero (p, 2*sizeof (int));
+ if ((((uintptr_t) p) % (2 * sizeof (int))) != 0 || p[0] || p[1])
+ abort ();
+ p[0] = 1;
+ p[1] = 2;
+ omp_free (p, omp_null_allocator);
+ omp_set_default_allocator (omp_large_cap_mem_alloc);
+ p = (int *) omp_aligned_calloc (1, 1, sizeof (int), omp_null_allocator);
+ check_all_zero (p, sizeof (int));
+ if ((((uintptr_t) p) % __alignof (int)) != 0 || p[0])
+ abort ();
+ p[0] = 3;
+ omp_free (p, omp_get_default_allocator ());
+
+ a = omp_init_allocator (omp_large_cap_mem_space, 3, traits);
+ if (a == omp_null_allocator)
+ abort ();
+ p = (int *) omp_aligned_calloc (32, 3, 1024, a);
+ check_all_zero (p, 3*1024);
+ if ((((uintptr_t) p) % 64) != 0)
+ abort ();
+ for (i = 0; i < 3072 / sizeof (int); i++)
+ if (p[i])
+ abort ();
+ p[0] = 1;
+ p[3071 / sizeof (int)] = 2;
+ if (omp_aligned_calloc (8, 192, 16, a) != NULL)
+ abort ();
+ omp_free (p, a);
+ p = (int *) omp_aligned_calloc (128, 6, 512, a);
+ check_all_zero (p, 6*512);
+ if ((((uintptr_t) p) % 128) != 0)
+ abort ();
+ for (i = 0; i < 3072 / sizeof (int); i++)
+ if (p[i])
+ abort ();
+ p[0] = 3;
+ p[3071 / sizeof (int)] = 4;
+ omp_free (p, omp_null_allocator);
+ omp_set_default_allocator (a);
+ if (omp_get_default_allocator () != a)
+ abort ();
+ p = (int *) omp_aligned_calloc (64, 12, 256, omp_null_allocator);
+ check_all_zero (p, 12*256);
+ for (i = 0; i < 3072 / sizeof (int); i++)
+ if (p[i])
+ abort ();
+ if (omp_aligned_calloc (8, 128, 24, omp_null_allocator) != NULL)
+ abort ();
+ omp_free (p, a);
+ omp_destroy_allocator (a);
+
+ a = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits2) / sizeof (traits2[0]),
+ traits2);
+ if (a == omp_null_allocator)
+ abort ();
+ if (traits3[5].key != omp_atk_fb_data)
+ abort ();
+ traits3[5].value = (uintptr_t) a;
+ a2 = omp_init_allocator (omp_default_mem_space,
+ sizeof (traits3) / sizeof (traits3[0]),
+ traits3);
+ if (a2 == omp_null_allocator)
+ abort ();
+ p = (int *) omp_aligned_calloc (4, 5, 84, a2);
+ check_all_zero (p, 5*84);
+ for (i = 0; i < 420 / sizeof (int); i++)
+ if (p[i])
+ abort ();
+ if ((((uintptr_t) p) % 32) != 0)
+ abort ();
+ p[0] = 5;
+ p[419 / sizeof (int)] = 6;
+ q = (int *) omp_aligned_calloc (8, 24, 32, a2);
+ check_all_zero (q, 24*32);
+ if ((((uintptr_t) q) % 16) != 0)
+ abort ();
+ for (i = 0; i < 768 / sizeof (int); i++)
+ if (q[i])
+ abort ();
+ q[0] = 7;
+ q[767 / sizeof (int)] = 8;
+ r = (int *) omp_aligned_calloc (8, 64, 8, a2);
+ check_all_zero (r, 64*8);
+ if ((((uintptr_t) r) % 8) != 0)
+ abort ();
+ for (i = 0; i < 512 / sizeof (int); i++)
+ if (r[i])
+ abort ();
+ r[0] = 9;
+ r[511 / sizeof (int)] = 10;
+ omp_free (p, omp_null_allocator);
+ omp_free (q, a2);
+ omp_free (r, omp_null_allocator);
+ omp_destroy_allocator (a2);
+ omp_destroy_allocator (a);
+
+ a = omp_init_allocator (omp_high_bw_mem_space,
+ sizeof (traits4) / sizeof (traits4[0]),
+ traits4);
+ if (a == omp_null_allocator)
+ abort ();
+ if (traits3[5].key != omp_atk_fb_data)
+ abort ();
+ traits3[5].value = (uintptr_t) a;
+ a2 = omp_init_allocator (omp_high_bw_mem_space,
+ sizeof (traits3) / sizeof (traits3[0]),
+ traits3);
+ if (a2 == omp_null_allocator)
+ abort ();
+ omp_set_default_allocator (a2);
+#ifdef __cplusplus
+ p = static_cast <int *> (omp_aligned_calloc (4, 21, 20));
+#else
+ p = (int *) omp_aligned_calloc (4, 21, 20, omp_null_allocator);
+#endif
+ check_all_zero (p, 21*20);
+ if ((((uintptr_t) p) % 32) != 0)
+ abort ();
+ for (i = 0; i < 420 / sizeof (int); i++)
+ if (p[i])
+ abort ();
+ p[0] = 5;
+ p[419 / sizeof (int)] = 6;
+ q = (int *) omp_aligned_calloc (64, 12, 64, omp_null_allocator);
+ check_all_zero (q, 12*64);
+ if ((((uintptr_t) q) % 128) != 0)
+ abort ();
+ for (i = 0; i < 768 / sizeof (int); i++)
+ if (q[i])
+ abort ();
+ q[0] = 7;
+ q[767 / sizeof (int)] = 8;
+ if (omp_aligned_calloc (8, 24, 32, omp_null_allocator) != NULL)
+ abort ();
+#ifdef __cplusplus
+ omp_free (p);
+ omp_free (q);
+ omp_free (NULL);
+#else
+ omp_free (p, omp_null_allocator);
+ omp_free (q, omp_null_allocator);
+ omp_free (NULL, omp_null_allocator);
+#endif
+ omp_free (NULL, omp_null_allocator);
+ omp_destroy_allocator (a2);
+ omp_destroy_allocator (a);
+ return 0;
+}