#define _GNU_SOURCE
#include <sys/mman.h>
-#include <unistd.h>
#include <string.h>
#include <assert.h>
#include "libgomp.h"
static int using_device_for_page_locked
= /* uninitialized */ -1;
-
-static usmpin_ctx_p pin_ctx = NULL;
-static pthread_once_t ctxlock = PTHREAD_ONCE_INIT;
-
-static void
-linux_init_pin_ctx ()
-{
- pin_ctx = usmpin_init_context ();
-}
-
static void *
linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin,
bool init0)
__FUNCTION__, (unsigned long long) memspace,
(unsigned long long) size, pin, init0);
- void *addr = NULL;
+ void *addr;
/* Explicit pinning may not be required. */
pin = pin && !always_pinned_mode;
}
if (using_device == 0)
{
- static int pagesize = 0;
- static void *addrhint = NULL;
-
- if (!pagesize)
- pagesize = sysconf(_SC_PAGE_SIZE);
-
- while (1)
+ gomp_debug (0, " mmap\n");
+ addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED)
+ addr = NULL;
+ else
{
- addr = usmpin_alloc (pin_ctx, size);
- if (addr)
- break;
-
- gomp_debug (0, " mmap\n");
-
- /* Round up to a whole page. */
- size_t misalignment = size % pagesize;
- size_t mmap_size = (misalignment > 0
- ? size + pagesize - misalignment
- : size);
- void *newpage = mmap (addrhint, mmap_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (newpage == MAP_FAILED)
- break;
- else
+ /* 'mmap' zero-initializes. */
+ init0 = false;
+
+ gomp_debug (0, " mlock\n");
+ if (mlock (addr, size))
{
- gomp_debug (0, " mlock\n");
- if (mlock (newpage, size))
- {
#ifdef HAVE_INTTYPES_H
- gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes"
- " of memory (ulimit too low?)\n",
- (uint64_t) size);
+ gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes of"
+ " memory (ulimit too low?)\n", (uint64_t) size);
#else
- gomp_debug (0, "libgomp: failed to pin %lu bytes of"
- " memory (ulimit too low?)\n",
- (unsigned long) size);
+ gomp_debug (0, "libgomp: failed to pin %lu bytes of memory"
+ " (ulimit too low?)\n", (unsigned long) size);
#endif
- munmap (newpage, size);
- break;
- }
-
- addrhint = newpage + mmap_size;
-
- pthread_once (&ctxlock, linux_init_pin_ctx);
- usmpin_register_memory (pin_ctx, newpage, mmap_size);
+ munmap (addr, size);
+ addr = NULL;
}
}
}
if (using_device == 1)
gomp_page_locked_host_free (addr);
else
- usmpin_free (pin_ctx, addr);
+ /* 'munlock'ing is implicit with following 'munmap'. */
+ munmap (addr, size);
}
else
free (addr);
if (oldpin && pin)
{
+ /* We can only expect to be able to just 'mremap' if not using a device
+ for page-locked memory. */
int using_device
= __atomic_load_n (&using_device_for_page_locked,
MEMMODEL_RELAXED);
gomp_debug (0, " using_device=%d\n",
using_device);
+ if (using_device != 0)
+ goto manual_realloc;
- /* The device plugin API does not support realloc,
- but the usmpin allocator does. */
- if (using_device == 0)
- {
- /* This can fail if there is insufficient pinned memory free. */
- void *newaddr = usmpin_realloc (pin_ctx, addr, size);
- if (newaddr)
- return newaddr;
- }
+ gomp_debug (0, " mremap\n");
+ void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE);
+ if (newaddr == MAP_FAILED)
+ return NULL;
+
+ return newaddr;
}
else if (oldpin || pin)
- /* Moving from pinned to unpinned memory cannot be done in-place. */
- ;
+ goto manual_realloc;
else
return realloc (addr, size);
- /* In-place reallocation failed. Fall back to copy. */
+manual_realloc:;
void *newaddr = linux_memspace_alloc (memspace, size, pin, false);
if (newaddr)
{
+++ /dev/null
-/* { dg-do run } */
-
-/* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */
-
-/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */
-
-/* Test that pinned memory works for small allocations. */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#ifdef __linux__
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <sys/mman.h>
-#include <sys/resource.h>
-
-#define PAGE_SIZE sysconf(_SC_PAGESIZE)
-#define CHECK_SIZE(SIZE) { \
- struct rlimit limit; \
- if (getrlimit (RLIMIT_MEMLOCK, &limit) \
- || limit.rlim_cur <= SIZE) \
- fprintf (stderr, "insufficient lockable memory; please increase ulimit\n"); \
- }
-
-int
-get_pinned_mem ()
-{
- int pid = getpid ();
- char buf[100];
- sprintf (buf, "/proc/%d/status", pid);
-
- FILE *proc = fopen (buf, "r");
- if (!proc)
- abort ();
- while (fgets (buf, 100, proc))
- {
- int val;
- if (sscanf (buf, "VmLck: %d", &val))
- {
- fclose (proc);
- return val;
- }
- }
- abort ();
-}
-#else
-#error "OS unsupported"
-#endif
-
-static void
-verify0 (char *p, size_t s)
-{
- for (size_t i = 0; i < s; ++i)
- if (p[i] != 0)
- abort ();
-}
-
-#include <omp.h>
-
-int
-main ()
-{
- /* Choose a small size where all our allocations fit on one page. */
- const int SIZE = 10;
-#ifndef OFFLOAD_DEVICE_NVPTX
- CHECK_SIZE (SIZE*4);
-#endif
-
- const omp_alloctrait_t traits[] = {
- { omp_atk_pinned, 1 }
- };
- omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space, 1, traits);
-
- // Sanity check
- if (get_pinned_mem () != 0)
- abort ();
-
- void *p = omp_alloc (SIZE, allocator);
- if (!p)
- abort ();
-
- int amount = get_pinned_mem ();
-#ifdef OFFLOAD_DEVICE_NVPTX
- /* This doesn't show up as process 'VmLck'ed memory. */
- if (amount != 0)
- abort ();
-#else
- if (amount == 0)
- abort ();
-#endif
-
- p = omp_realloc (p, SIZE * 2, allocator, allocator);
-
- int amount2 = get_pinned_mem ();
-#ifdef OFFLOAD_DEVICE_NVPTX
- /* This doesn't show up as process 'VmLck'ed memory. */
- if (amount2 != 0)
- abort ();
-#else
- /* A small allocation should not allocate another page. */
- if (amount2 != amount)
- abort ();
-#endif
-
- p = omp_calloc (1, SIZE, allocator);
-
-#ifdef OFFLOAD_DEVICE_NVPTX
- /* This doesn't show up as process 'VmLck'ed memory. */
- if (get_pinned_mem () != 0)
- abort ();
-#else
- /* A small allocation should not allocate another page. */
- if (get_pinned_mem () != amount2)
- abort ();
-#endif
-
- verify0 (p, SIZE);
-
- return 0;
-}
+++ /dev/null
-/* Copyright (C) 2023 Free Software Foundation, Inc.
-
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
-
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-/* This is a simple "malloc" implementation intended for use with Unified
- Shared Memory and Pinned Memory. It allocates memory from a pool allocated
- and configured by the device plugin (for USM), or the OS-specific allocator
- (for pinned).
-
- This implementation keeps the allocated/free chain in a side-table (splay
- tree) to ensure that the allocation routine does not migrate all the USM
- pages back into host memory. Keeping the meta-data elsewhere is also useful
- for pinned memory, which is typically an extremely limited resource. */
-
-#include <string.h>
-#include "libgomp.h"
-
-/* Use a splay tree to track allocations. */
-
-typedef struct usmpin_splay_tree_node_s *usmpin_splay_tree_node;
-typedef struct usmpin_splay_tree_s *usmpin_splay_tree;
-typedef struct usmpin_splay_tree_key_s *usmpin_splay_tree_key;
-
-struct usmpin_splay_tree_key_s {
- void *base;
- size_t size;
-};
-
-static inline int
-usmpin_splay_compare (usmpin_splay_tree_key x, usmpin_splay_tree_key y)
-{
- return (x->base == y->base ? 0
- : x->base > y->base ? 1
- : -1);
-}
-
-#define splay_tree_prefix usmpin
-#include "splay-tree.h"
-
-/* 128-byte granularity means GPU cache-line aligned. */
-#define ALIGN(VAR) (((VAR) + 127) & ~127)
-
-/* The context data prevents the need for global state. */
-struct usmpin_context {
- int lock;
- struct usmpin_splay_tree_s allocations;
- struct usmpin_splay_tree_s free_space;
-};
-
-usmpin_ctx_p
-usmpin_init_context ()
-{
- return calloc (1, sizeof (struct usmpin_context));
-}
-
-/* Coalesce contiguous free space into one entry. This considers the entries
- either side of the root node only, so it should be called each time a new
- entry in inserted into the root. */
-
-static void
-usmpin_coalesce_free_space (usmpin_ctx_p ctx)
-{
- usmpin_splay_tree_node prev, next, node = ctx->free_space.root;
-
- for (prev = node->left; prev && prev->right; prev = prev->right)
- ;
- for (next = node->right; next && next->left; next = next->left)
- ;
-
- /* Coalesce adjacent free chunks. */
- if (next
- && node->key.base + node->key.size == next->key.base)
- {
- /* Free chunk follows. */
- node->key.size += next->key.size;
- usmpin_splay_tree_remove (&ctx->free_space, &next->key);
- free (next);
- }
- if (prev
- && prev->key.base + prev->key.size == node->key.base)
- {
- /* Free chunk precedes. */
- prev->key.size += node->key.size;
- usmpin_splay_tree_remove (&ctx->free_space, &node->key);
- free (node);
- }
-}
-
-/* Add a new memory region into the free chain. This is how the USM heap is
- initialized and extended. If the new region is contiguous with an existing
- region then any free space will be coalesced. */
-
-void
-usmpin_register_memory (usmpin_ctx_p ctx, char *base, size_t size)
-{
- if (base == NULL || ctx == NULL)
- return;
-
- while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1)
- ;
-
- usmpin_splay_tree_node node;
- node = malloc (sizeof (struct usmpin_splay_tree_node_s));
- node->key.base = base;
- node->key.size = size;
- node->left = NULL;
- node->right = NULL;
- usmpin_splay_tree_insert (&ctx->free_space, node);
- usmpin_coalesce_free_space (ctx);
-
- __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE);
-}
-
-/* This splay_tree_foreach callback selects the first free space large enough
- to hold the allocation needed. Since the splay_tree walk may start in the
- middle the "first" isn't necessarily the "leftmost" entry. */
-
-struct usmpin_callback_data {
- size_t size;
- usmpin_splay_tree_node found;
-};
-
-static int
-usmpin_alloc_callback (usmpin_splay_tree_key key, void *data)
-{
- struct usmpin_callback_data *cbd = (struct usmpin_callback_data *)data;
-
- if (key->size >= cbd->size)
- {
- cbd->found = (usmpin_splay_tree_node)key;
- return 1;
- }
-
- return 0;
-}
-
-/* USM "malloc". Selects and moves and address range from ctx->free_space to
- ctx->allocations, while leaving any excess in ctx->free_space. */
-
-void *
-usmpin_alloc (usmpin_ctx_p ctx, size_t size)
-{
- if (ctx == NULL)
- return NULL;
-
- /* Memory is allocated in N-byte granularity. */
- size = ALIGN (size);
-
- /* Acquire the lock. */
- while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1)
- ;
-
- if (!ctx->free_space.root)
- {
- /* No memory registered, or no free space. */
- __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE);
- return NULL;
- }
-
- /* Find a suitable free block. */
- struct usmpin_callback_data cbd = {size, NULL};
- usmpin_splay_tree_foreach_lazy (&ctx->free_space, usmpin_alloc_callback,
- &cbd);
- usmpin_splay_tree_node freenode = cbd.found;
-
- void *result = NULL;
- if (freenode)
- {
- /* Allocation successful. */
- result = freenode->key.base;
- usmpin_splay_tree_node allocnode = malloc (sizeof (*allocnode));
- allocnode->key.base = result;
- allocnode->key.size = size;
- allocnode->left = NULL;
- allocnode->right = NULL;
- usmpin_splay_tree_insert (&ctx->allocations, allocnode);
-
- /* Update the free chain. */
- size_t stillfree_size = freenode->key.size - size;
- if (stillfree_size > 0)
- {
- freenode->key.base = freenode->key.base + size;
- freenode->key.size = stillfree_size;
- }
- else
- {
- usmpin_splay_tree_remove (&ctx->free_space, &freenode->key);
- free (freenode);
- }
- }
-
- /* Release the lock. */
- __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE);
-
- return result;
-}
-
-/* USM "free". Moves an address range from ctx->allocations to
- ctx->free_space and merges that record with any contiguous free memory. */
-
-void
-usmpin_free (usmpin_ctx_p ctx, void *addr)
-{
- if (ctx == NULL)
- return;
-
- /* Acquire the lock. */
- while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1)
- ;
-
- /* Convert the memory map to free. */
- struct usmpin_splay_tree_key_s key = {addr};
- usmpin_splay_tree_key found = usmpin_splay_tree_lookup (&ctx->allocations,
- &key);
- if (!found)
- GOMP_PLUGIN_fatal ("invalid free");
- usmpin_splay_tree_remove (&ctx->allocations, &key);
- usmpin_splay_tree_insert (&ctx->free_space, (usmpin_splay_tree_node)found);
- usmpin_coalesce_free_space (ctx);
-
- /* Release the lock. */
- __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE);
-}
-
-/* USM "realloc". Works in-place, if possible; reallocates otherwise. */
-
-void *
-usmpin_realloc (usmpin_ctx_p ctx, void *addr, size_t newsize)
-{
- if (ctx == NULL)
- return NULL;
-
- newsize = ALIGN (newsize);
-
- /* Acquire the lock. */
- while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1)
- ;
-
- /* Convert the memory map to free. */
- struct usmpin_splay_tree_key_s key = {addr};
- usmpin_splay_tree_key found = usmpin_splay_tree_lookup (&ctx->allocations,
- &key);
- if (!found)
- GOMP_PLUGIN_fatal ("invalid realloc");
-
- if (newsize == found->size)
- ; /* Nothing to do. */
- else if (newsize < found->size)
- {
- /* We're reducing the allocation size. */
- usmpin_splay_tree_node newfree = malloc (sizeof (*newfree));
- newfree->key.base = found->base + newsize;
- newfree->key.size = found->size - newsize;
- newfree->left = NULL;
- newfree->right = NULL;
- usmpin_splay_tree_insert (&ctx->free_space, newfree);
- usmpin_coalesce_free_space (ctx);
- }
- else
- {
- /* We're extending the allocation. */
- struct usmpin_splay_tree_key_s freekey = {addr + found->size};
- usmpin_splay_tree_key foundfree;
- foundfree = usmpin_splay_tree_lookup (&ctx->free_space, &freekey);
- if (foundfree && foundfree->size >= newsize - found->size)
- {
- /* Allocation can be expanded in place. */
- foundfree->base += found->size;
- foundfree->size -= newsize - found->size;
- found->size = newsize;
-
- if (foundfree->size == 0)
- usmpin_splay_tree_remove (&ctx->free_space, &freekey);
- }
- else
- {
- /* Allocation must be relocated.
- Release the lock and use alloc/free. */
- __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE);
-
- void *newaddr = usmpin_alloc (ctx, newsize);
- if (!newaddr)
- return NULL;
-
- memcpy (newaddr, addr, found->size);
- usmpin_free (ctx, addr);
- return newaddr;
- }
- }
-
- /* Release the lock. */
- __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE);
- return addr;
-}
-
-/* Include the splay tree code inline, with the prefixes added. */
-#define splay_tree_prefix usmpin
-#define splay_tree_c
-#include "splay-tree.h"