+2022-03-10 Andrew Stubbs <ams@codesourcery.com>
+
+ Backport of a patch posted at
+ https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588360.html
+
+ * allocator.c (MEMSPACE_ALLOC): Add PIN.
+ (MEMSPACE_CALLOC): Add PIN.
+ (MEMSPACE_REALLOC): Add PIN.
+ (MEMSPACE_FREE): Add PIN.
+ (xmlock): New function.
+ (omp_init_allocator): Don't disallow the pinned trait.
+ (omp_aligned_alloc): Add pinning to all MEMSPACE_* calls.
+ (omp_aligned_calloc): Likewise.
+ (omp_realloc): Likewise.
+ (omp_free): Likewise.
+ * config/linux/allocator.c: New file.
+ * config/nvptx/allocator.c (MEMSPACE_ALLOC): Add PIN.
+ (MEMSPACE_CALLOC): Add PIN.
+ (MEMSPACE_REALLOC): Add PIN.
+ (MEMSPACE_FREE): Add PIN.
+ * testsuite/libgomp.c/alloc-pinned-1.c: New test.
+ * testsuite/libgomp.c/alloc-pinned-2.c: New test.
+ * testsuite/libgomp.c/alloc-pinned-3.c: New test.
+ * testsuite/libgomp.c/alloc-pinned-4.c: New test.
+
2022-03-09 Abid Qadeer <abidh@codesourcery.com>
Backport of a patch posted at
/* These macros may be overridden in config/<target>/allocator.c. */
#ifndef MEMSPACE_ALLOC
-#define MEMSPACE_ALLOC(MEMSPACE, SIZE) \
- ((void)MEMSPACE, malloc (SIZE))
+#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
+ (PIN ? NULL : malloc (SIZE))
#endif
#ifndef MEMSPACE_CALLOC
-#define MEMSPACE_CALLOC(MEMSPACE, SIZE) \
- ((void)MEMSPACE, calloc (1, SIZE))
+#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
+ (PIN ? NULL : calloc (1, SIZE))
#endif
#ifndef MEMSPACE_REALLOC
-#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) \
- ((void)MEMSPACE, (void)OLDSIZE, realloc (ADDR, SIZE))
+#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
+ ((PIN) || (OLDPIN) ? NULL : realloc (ADDR, SIZE))
#endif
#ifndef MEMSPACE_FREE
-#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) \
- ((void)MEMSPACE, (void)SIZE, free (ADDR))
+#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \
+ (PIN ? NULL : free (ADDR))
#endif
/* Map the predefined allocators to the correct memory space.
data.alignment = sizeof (void *);
/* No support for these so far (for hbw will use memkind). */
- if (data.pinned || data.memspace == omp_high_bw_mem_space)
+ if (data.memspace == omp_high_bw_mem_space)
return omp_null_allocator;
ret = gomp_malloc (sizeof (struct omp_allocator_data));
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
- ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size);
+ ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size,
+ allocator_data->pinned);
if (ptr == NULL)
{
#ifdef HAVE_SYNC_BUILTINS
omp_memspace_handle_t memspace = (allocator_data
? allocator_data->memspace
: predefined_alloc_mapping[allocator]);
- ptr = MEMSPACE_ALLOC (memspace, new_size);
+ ptr = MEMSPACE_ALLOC (memspace, new_size,
+ allocator_data && allocator_data->pinned);
if (ptr == NULL)
goto fail;
}
{
case omp_atv_default_mem_fb:
if ((new_alignment > sizeof (void *) && new_alignment > alignment)
- || (allocator_data
- && allocator_data->pool_size < ~(uintptr_t) 0)
- || !allocator_data)
+ || !allocator_data
+ || allocator_data->pool_size < ~(uintptr_t) 0
+ || allocator_data->pinned)
{
allocator = omp_default_mem_alloc;
goto retry;
{
struct omp_mem_header *data;
omp_memspace_handle_t memspace = omp_default_mem_space;
+ int pinned __attribute__((unused)) = false;
if (ptr == NULL)
return;
}
memspace = allocator_data->memspace;
+ pinned = allocator_data->pinned;
}
else
memspace = predefined_alloc_mapping[data->allocator];
- MEMSPACE_FREE (memspace, data->ptr, data->size);
+ MEMSPACE_FREE (memspace, data->ptr, data->size, pinned);
}
ialias (omp_free)
allocator_data->used_pool_size = used_pool_size;
gomp_mutex_unlock (&allocator_data->lock);
#endif
- ptr = MEMSPACE_CALLOC (allocator_data->memspace, new_size);
+ ptr = MEMSPACE_CALLOC (allocator_data->memspace, new_size,
+ allocator_data->pinned);
if (ptr == NULL)
{
#ifdef HAVE_SYNC_BUILTINS
omp_memspace_handle_t memspace = (allocator_data
? allocator_data->memspace
: predefined_alloc_mapping[allocator]);
- ptr = MEMSPACE_CALLOC (memspace, new_size);
+ ptr = MEMSPACE_ALLOC (memspace, new_size,
+ allocator_data && allocator_data->pinned);
if (ptr == NULL)
goto fail;
}
{
case omp_atv_default_mem_fb:
if ((new_alignment > sizeof (void *) && new_alignment > alignment)
- || (allocator_data
- && allocator_data->pool_size < ~(uintptr_t) 0)
- || !allocator_data)
+ || !allocator_data
+ || allocator_data->pool_size < ~(uintptr_t) 0
+ || allocator_data->pinned)
{
allocator = omp_default_mem_alloc;
goto retry;
#endif
if (prev_size)
new_ptr = MEMSPACE_REALLOC (allocator_data->memspace, data->ptr,
- data->size, new_size);
+ data->size, new_size,
+ (free_allocator_data
+ && free_allocator_data->pinned),
+ allocator_data->pinned);
else
- new_ptr = malloc (new_size);
+ new_ptr = MEMSPACE_ALLOC (allocator_data->memspace, new_size,
+ allocator_data->pinned);
if (new_ptr == NULL)
{
#ifdef HAVE_SYNC_BUILTINS
omp_memspace_handle_t memspace = (allocator_data
? allocator_data->memspace
: predefined_alloc_mapping[allocator]);
- new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size);
+ new_ptr = MEMSPACE_REALLOC (memspace, data->ptr, data->size, new_size,
+ (free_allocator_data
+ && free_allocator_data->pinned),
+ allocator_data && allocator_data->pinned);
if (new_ptr == NULL)
goto fail;
ret = (char *) new_ptr + sizeof (struct omp_mem_header);
}
else
{
- new_ptr = malloc (new_size);
+ omp_memspace_handle_t memspace
+ = (allocator_data
+ ? allocator_data->memspace
+ : predefined_alloc_mapping[allocator]);
+ new_ptr = MEMSPACE_ALLOC (memspace, new_size,
+ allocator_data && allocator_data->pinned);
if (new_ptr == NULL)
goto fail;
}
{
case omp_atv_default_mem_fb:
if (new_alignment > sizeof (void *)
- || (allocator_data
- && allocator_data->pool_size < ~(uintptr_t) 0)
- || !allocator_data)
+ || !allocator_data
+ || allocator_data->pool_size < ~(uintptr_t) 0
+ || allocator_data->pinned)
{
allocator = omp_default_mem_alloc;
goto retry;
--- /dev/null
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Implement malloc routines that can handle pinned memory on Linux.
+
+ It's possible to use mlock on any heap memory, but using munlock is
+ problematic if there are multiple pinned allocations on the same page.
+ Tracking all that manually would be possible, but adds overhead. This may
+ be worth it if there are a lot of small allocations getting pinned, but
+ this seems less likely in a HPC application.
+
+ Instead we optimize for large pinned allocations, and use mmap to ensure
+ that two pinned allocations don't share the same page. This also means
+ that large allocations don't pin extra pages by being poorly aligned. */
+
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <string.h>
+#include "libgomp.h"
+
+static void *
+linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin)
+{
+ (void)memspace;
+
+ if (pin)
+ {
+ void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED)
+ return NULL;
+
+ if (mlock (addr, size))
+ {
+ gomp_debug (0, "libgomp: failed to pin memory (ulimit too low?)\n");
+ munmap (addr, size);
+ return NULL;
+ }
+
+ return addr;
+ }
+ else
+ return malloc (size);
+}
+
+static void *
+linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
+{
+ if (pin)
+ return linux_memspace_alloc (memspace, size, pin);
+ else
+ return calloc (1, size);
+}
+
+static void
+linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size,
+ int pin)
+{
+ (void)memspace;
+
+ if (pin)
+ munmap (addr, size);
+ else
+ free (addr);
+}
+
+static void *
+linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
+ size_t oldsize, size_t size, int oldpin, int pin)
+{
+ if (oldpin && pin)
+ {
+ void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE);
+ if (newaddr == MAP_FAILED)
+ return NULL;
+
+ return newaddr;
+ }
+ else if (oldpin || pin)
+ {
+ void *newaddr = linux_memspace_alloc (memspace, size, pin);
+ if (newaddr)
+ {
+ memcpy (newaddr, addr, oldsize < size ? oldsize : size);
+ linux_memspace_free (memspace, addr, oldsize, oldpin);
+ }
+
+ return newaddr;
+ }
+ else
+ return realloc (addr, size);
+}
+
+#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
+ linux_memspace_alloc (MEMSPACE, SIZE, PIN)
+#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
+ linux_memspace_calloc (MEMSPACE, SIZE, PIN)
+#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
+ linux_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN)
+#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \
+ linux_memspace_free (MEMSPACE, ADDR, SIZE, PIN)
+
+#include "../../allocator.c"
return realloc (addr, size);
}
-#define MEMSPACE_ALLOC(MEMSPACE, SIZE) \
+#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
nvptx_memspace_alloc (MEMSPACE, SIZE)
-#define MEMSPACE_CALLOC(MEMSPACE, SIZE) \
+#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
nvptx_memspace_calloc (MEMSPACE, SIZE)
-#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE) \
+#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
nvptx_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE)
-#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE) \
+#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \
nvptx_memspace_free (MEMSPACE, ADDR, SIZE)
#include "../../allocator.c"
--- /dev/null
+/* { dg-do run } */
+
+/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu } } */
+
+/* Test that pinned memory works. */
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/mman.h>
+
+int
+get_pinned_mem ()
+{
+ int pid = getpid ();
+ char buf[100];
+ sprintf (buf, "/proc/%d/status", pid);
+
+ FILE *proc = fopen (buf, "r");
+ if (!proc)
+ abort ();
+ while (fgets (buf, 100, proc))
+ {
+ int val;
+ if (sscanf (buf, "VmLck: %d", &val))
+ {
+ fclose (proc);
+ return val;
+ }
+ }
+ abort ();
+}
+#else
+int
+get_pinned_mem ()
+{
+ return 0;
+}
+#endif
+
+#include <omp.h>
+
+/* Allocate more than a page each time, but stay within the ulimit. */
+#define SIZE 10*1024
+
+int
+main ()
+{
+ const omp_alloctrait_t traits[] = {
+ { omp_atk_pinned, 1 }
+ };
+ omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space, 1, traits);
+
+ // Sanity check
+ if (get_pinned_mem () != 0)
+ abort ();
+
+ void *p = omp_alloc (SIZE, allocator);
+ if (!p)
+ abort ();
+
+ int amount = get_pinned_mem ();
+ if (amount == 0)
+ abort ();
+
+ p = omp_realloc (p, SIZE*2, allocator, allocator);
+
+ int amount2 = get_pinned_mem ();
+ if (amount2 <= amount)
+ abort ();
+
+ p = omp_calloc (1, SIZE, allocator);
+
+ if (get_pinned_mem () <= amount2)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+/* { dg-xfail-run-if "Pinning not implemented on this host" { ! *-*-linux-gnu } } */
+
+/* Test that pinned memory works (pool_size code path). */
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/mman.h>
+
+int
+get_pinned_mem ()
+{
+ int pid = getpid ();
+ char buf[100];
+ sprintf (buf, "/proc/%d/status", pid);
+
+ FILE *proc = fopen (buf, "r");
+ if (!proc)
+ abort ();
+ while (fgets (buf, 100, proc))
+ {
+ int val;
+ if (sscanf (buf, "VmLck: %d", &val))
+ {
+ fclose (proc);
+ return val;
+ }
+ }
+ abort ();
+}
+#else
+int
+get_pinned_mem ()
+{
+ return 0;
+}
+#endif
+
+#include <omp.h>
+
+/* Allocate more than a page each time, but stay within the ulimit. */
+#define SIZE 10*1024
+
+int
+main ()
+{
+ const omp_alloctrait_t traits[] = {
+ { omp_atk_pinned, 1 },
+ { omp_atk_pool_size, SIZE*8 }
+ };
+ omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space,
+ 2, traits);
+
+ // Sanity check
+ if (get_pinned_mem () != 0)
+ abort ();
+
+ void *p = omp_alloc (SIZE, allocator);
+ if (!p)
+ abort ();
+
+ int amount = get_pinned_mem ();
+ if (amount == 0)
+ abort ();
+
+ p = omp_realloc (p, SIZE*2, allocator, allocator);
+ if (!p)
+ abort ();
+
+ int amount2 = get_pinned_mem ();
+ if (amount2 <= amount)
+ abort ();
+
+ p = omp_calloc (1, SIZE, allocator);
+ if (!p)
+ abort ();
+
+ if (get_pinned_mem () <= amount2)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+/* Test that pinned memory fails correctly. */
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+int
+get_pinned_mem ()
+{
+ int pid = getpid ();
+ char buf[100];
+ sprintf (buf, "/proc/%d/status", pid);
+
+ FILE *proc = fopen (buf, "r");
+ if (!proc)
+ abort ();
+ while (fgets (buf, 100, proc))
+ {
+ int val;
+ if (sscanf (buf, "VmLck: %d", &val))
+ {
+ fclose (proc);
+ return val;
+ }
+ }
+ abort ();
+}
+
+void
+set_pin_limit (int size)
+{
+ struct rlimit limit;
+ if (getrlimit (RLIMIT_MEMLOCK, &limit))
+ abort ();
+ limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size);
+ if (setrlimit (RLIMIT_MEMLOCK, &limit))
+ abort ();
+}
+#else
+int
+get_pinned_mem ()
+{
+ return 0;
+}
+
+void
+set_pin_limit ()
+{
+}
+#endif
+
+#include <omp.h>
+
+/* This should be large enough to cover multiple pages. */
+#define SIZE 10000*1024
+
+int
+main ()
+{
+ /* Pinned memory, no fallback. */
+ const omp_alloctrait_t traits1[] = {
+ { omp_atk_pinned, 1 },
+ { omp_atk_fallback, omp_atv_null_fb }
+ };
+ omp_allocator_handle_t allocator1 = omp_init_allocator (omp_default_mem_space, 2, traits1);
+
+ /* Pinned memory, plain memory fallback. */
+ const omp_alloctrait_t traits2[] = {
+ { omp_atk_pinned, 1 },
+ { omp_atk_fallback, omp_atv_default_mem_fb }
+ };
+ omp_allocator_handle_t allocator2 = omp_init_allocator (omp_default_mem_space, 2, traits2);
+
+ /* Ensure that the limit is smaller than the allocation. */
+ set_pin_limit (SIZE/2);
+
+ // Sanity check
+ if (get_pinned_mem () != 0)
+ abort ();
+
+ // Should fail
+ void *p = omp_alloc (SIZE, allocator1);
+ if (p)
+ abort ();
+
+ // Should fail
+ p = omp_calloc (1, SIZE, allocator1);
+ if (p)
+ abort ();
+
+ // Should fall back
+ p = omp_alloc (SIZE, allocator2);
+ if (!p)
+ abort ();
+
+ // Should fall back
+ p = omp_calloc (1, SIZE, allocator2);
+ if (!p)
+ abort ();
+
+ // Should fail to realloc
+ void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
+ p = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc);
+ if (!notpinned || p)
+ abort ();
+
+ // Should fall back to no realloc needed
+ p = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc);
+ if (p != notpinned)
+ abort ();
+
+ // No memory should have been pinned
+ int amount = get_pinned_mem ();
+ if (amount != 0)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+/* Test that pinned memory fails correctly, pool_size code path. */
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <sys/mman.h>
+#include <sys/resource.h>
+
+int
+get_pinned_mem ()
+{
+ int pid = getpid ();
+ char buf[100];
+ sprintf (buf, "/proc/%d/status", pid);
+
+ FILE *proc = fopen (buf, "r");
+ if (!proc)
+ abort ();
+ while (fgets (buf, 100, proc))
+ {
+ int val;
+ if (sscanf (buf, "VmLck: %d", &val))
+ {
+ fclose (proc);
+ return val;
+ }
+ }
+ abort ();
+}
+
+void
+set_pin_limit (int size)
+{
+ struct rlimit limit;
+ if (getrlimit (RLIMIT_MEMLOCK, &limit))
+ abort ();
+ limit.rlim_cur = (limit.rlim_max < size ? limit.rlim_max : size);
+ if (setrlimit (RLIMIT_MEMLOCK, &limit))
+ abort ();
+}
+#else
+int
+get_pinned_mem ()
+{
+ return 0;
+}
+
+void
+set_pin_limit ()
+{
+}
+#endif
+
+#include <omp.h>
+
+/* This should be large enough to cover multiple pages. */
+#define SIZE 10000*1024
+
+int
+main ()
+{
+ /* Pinned memory, no fallback. */
+ const omp_alloctrait_t traits1[] = {
+ { omp_atk_pinned, 1 },
+ { omp_atk_fallback, omp_atv_null_fb },
+ { omp_atk_pool_size, SIZE*8 }
+ };
+ omp_allocator_handle_t allocator1 = omp_init_allocator (omp_default_mem_space, 3, traits1);
+
+ /* Pinned memory, plain memory fallback. */
+ const omp_alloctrait_t traits2[] = {
+ { omp_atk_pinned, 1 },
+ { omp_atk_fallback, omp_atv_default_mem_fb },
+ { omp_atk_pool_size, SIZE*8 }
+ };
+ omp_allocator_handle_t allocator2 = omp_init_allocator (omp_default_mem_space, 3, traits2);
+
+ /* Ensure that the limit is smaller than the allocation. */
+ set_pin_limit (SIZE/2);
+
+ // Sanity check
+ if (get_pinned_mem () != 0)
+ abort ();
+
+ // Should fail
+ void *p = omp_alloc (SIZE, allocator1);
+ if (p)
+ abort ();
+
+ // Should fail
+ p = omp_calloc (1, SIZE, allocator1);
+ if (p)
+ abort ();
+
+ // Should fall back
+ p = omp_alloc (SIZE, allocator2);
+ if (!p)
+ abort ();
+
+ // Should fall back
+ p = omp_calloc (1, SIZE, allocator2);
+ if (!p)
+ abort ();
+
+ // Should fail to realloc
+ void *notpinned = omp_alloc (SIZE, omp_default_mem_alloc);
+ p = omp_realloc (notpinned, SIZE, allocator1, omp_default_mem_alloc);
+ if (!notpinned || p)
+ abort ();
+
+ // Should fall back to no realloc needed
+ p = omp_realloc (notpinned, SIZE, allocator2, omp_default_mem_alloc);
+ if (p != notpinned)
+ abort ();
+
+ // No memory should have been pinned
+ int amount = get_pinned_mem ();
+ if (amount != 0)
+ abort ();
+
+ return 0;
+}