From: Andrew Stubbs Date: Thu, 23 Oct 2025 11:49:20 +0000 (+0000) Subject: Revert "libgomp: fine-grained pinned memory allocator" X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ae21c84502ab222a6f24580d977408a5bcf5e9cc;p=thirdparty%2Fgcc.git Revert "libgomp: fine-grained pinned memory allocator" This reverts commit 59ebc6007887151cdb0f7d00108b86a5921ec5a4. --- diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am index aece10369cc..f0ba2b98005 100644 --- a/libgomp/Makefile.am +++ b/libgomp/Makefile.am @@ -71,7 +71,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \ oacc-target.c target-indirect.c oacc-profiling-acc_register_library.c \ - usmpin-allocator.c target-cxa-dso-dtor.c + target-cxa-dso-dtor.c include $(top_srcdir)/plugin/Makefrag.am diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 89dc47c68c8..aac36955336 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -220,8 +220,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \ oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \ affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \ oacc-target.lo target-indirect.lo \ - oacc-profiling-acc_register_library.lo usmpin-allocator.lo \ - target-cxa-dso-dtor.lo $(am__objects_1) + oacc-profiling-acc_register_library.lo target-cxa-dso-dtor.lo \ + $(am__objects_1) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) @@ -555,8 +555,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \ affinity-fmt.c teams.c allocator.c oacc-profiling.c \ oacc-target.c target-indirect.c \ - oacc-profiling-acc_register_library.c usmpin-allocator.c \ - target-cxa-dso-dtor.c $(am__append_3) + oacc-profiling-acc_register_library.c target-cxa-dso-dtor.c \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -792,7 +792,6 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/team.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/teams.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/usmpin-allocator.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/work.Plo@am__quote@ .c.o: diff --git a/libgomp/config/linux/allocator.c b/libgomp/config/linux/allocator.c index 845ee27c666..06b38cc2c6e 100644 --- a/libgomp/config/linux/allocator.c +++ b/libgomp/config/linux/allocator.c @@ -53,7 +53,6 @@ #define _GNU_SOURCE #include -#include #include #include #include "libgomp.h" @@ -78,16 +77,6 @@ GOMP_enable_pinned_mode () static int using_device_for_page_locked = /* uninitialized */ -1; - -static usmpin_ctx_p pin_ctx = NULL; -static pthread_once_t ctxlock = PTHREAD_ONCE_INIT; - -static void -linux_init_pin_ctx () -{ - pin_ctx = usmpin_init_context (); -} - static void * linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin, bool init0) @@ -96,7 +85,7 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin, __FUNCTION__, (unsigned long long) memspace, (unsigned long long) size, pin, init0); - void *addr = NULL; + void *addr; /* Explicit pinning may not be required. */ pin = pin && !always_pinned_mode; @@ -122,51 +111,28 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin, } if (using_device == 0) { - static int pagesize = 0; - static void *addrhint = NULL; - - if (!pagesize) - pagesize = sysconf(_SC_PAGE_SIZE); - - while (1) + gomp_debug (0, " mmap\n"); + addr = mmap (NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) + addr = NULL; + else { - addr = usmpin_alloc (pin_ctx, size); - if (addr) - break; - - gomp_debug (0, " mmap\n"); - - /* Round up to a whole page. */ - size_t misalignment = size % pagesize; - size_t mmap_size = (misalignment > 0 - ? size + pagesize - misalignment - : size); - void *newpage = mmap (addrhint, mmap_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (newpage == MAP_FAILED) - break; - else + /* 'mmap' zero-initializes. */ + init0 = false; + + gomp_debug (0, " mlock\n"); + if (mlock (addr, size)) { - gomp_debug (0, " mlock\n"); - if (mlock (newpage, size)) - { #ifdef HAVE_INTTYPES_H - gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes" - " of memory (ulimit too low?)\n", - (uint64_t) size); + gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes of" + " memory (ulimit too low?)\n", (uint64_t) size); #else - gomp_debug (0, "libgomp: failed to pin %lu bytes of" - " memory (ulimit too low?)\n", - (unsigned long) size); + gomp_debug (0, "libgomp: failed to pin %lu bytes of memory" + " (ulimit too low?)\n", (unsigned long) size); #endif - munmap (newpage, size); - break; - } - - addrhint = newpage + mmap_size; - - pthread_once (&ctxlock, linux_init_pin_ctx); - usmpin_register_memory (pin_ctx, newpage, mmap_size); + munmap (addr, size); + addr = NULL; } } } @@ -218,7 +184,8 @@ linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size, if (using_device == 1) gomp_page_locked_host_free (addr); else - usmpin_free (pin_ctx, addr); + /* 'munlock'ing is implicit with following 'munmap'. */ + munmap (addr, size); } else free (addr); @@ -236,29 +203,29 @@ linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr, if (oldpin && pin) { + /* We can only expect to be able to just 'mremap' if not using a device + for page-locked memory. */ int using_device = __atomic_load_n (&using_device_for_page_locked, MEMMODEL_RELAXED); gomp_debug (0, " using_device=%d\n", using_device); + if (using_device != 0) + goto manual_realloc; - /* The device plugin API does not support realloc, - but the usmpin allocator does. */ - if (using_device == 0) - { - /* This can fail if there is insufficient pinned memory free. */ - void *newaddr = usmpin_realloc (pin_ctx, addr, size); - if (newaddr) - return newaddr; - } + gomp_debug (0, " mremap\n"); + void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE); + if (newaddr == MAP_FAILED) + return NULL; + + return newaddr; } else if (oldpin || pin) - /* Moving from pinned to unpinned memory cannot be done in-place. */ - ; + goto manual_realloc; else return realloc (addr, size); - /* In-place reallocation failed. Fall back to copy. */ +manual_realloc:; void *newaddr = linux_memspace_alloc (memspace, size, pin, false); if (newaddr) { diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 465f7c1b4ea..c584e0fc75b 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1689,14 +1689,4 @@ gomp_thread_to_pthread_t (struct gomp_thread *thr) } #endif -/* usmpin-allocator.c */ - -typedef struct usmpin_context *usmpin_ctx_p; - -usmpin_ctx_p usmpin_init_context (); -void usmpin_register_memory (usmpin_ctx_p ctx, char *base, size_t size); -void *usmpin_alloc (usmpin_ctx_p ctx, size_t size); -void usmpin_free (usmpin_ctx_p ctx, void *addr); -void *usmpin_realloc (usmpin_ctx_p ctx, void *addr, size_t newsize); - #endif /* LIBGOMP_H */ diff --git a/libgomp/testsuite/libgomp.c/alloc-pinned-8.c b/libgomp/testsuite/libgomp.c/alloc-pinned-8.c deleted file mode 100644 index 0fc737b1e2a..00000000000 --- a/libgomp/testsuite/libgomp.c/alloc-pinned-8.c +++ /dev/null @@ -1,122 +0,0 @@ -/* { dg-do run } */ - -/* { dg-skip-if "Pinning not implemented on this host" { ! *-*-linux-gnu* } } */ - -/* { dg-additional-options -DOFFLOAD_DEVICE_NVPTX { target offload_device_nvptx } } */ - -/* Test that pinned memory works for small allocations. */ - -#include -#include - -#ifdef __linux__ -#include -#include - -#include -#include - -#define PAGE_SIZE sysconf(_SC_PAGESIZE) -#define CHECK_SIZE(SIZE) { \ - struct rlimit limit; \ - if (getrlimit (RLIMIT_MEMLOCK, &limit) \ - || limit.rlim_cur <= SIZE) \ - fprintf (stderr, "insufficient lockable memory; please increase ulimit\n"); \ - } - -int -get_pinned_mem () -{ - int pid = getpid (); - char buf[100]; - sprintf (buf, "/proc/%d/status", pid); - - FILE *proc = fopen (buf, "r"); - if (!proc) - abort (); - while (fgets (buf, 100, proc)) - { - int val; - if (sscanf (buf, "VmLck: %d", &val)) - { - fclose (proc); - return val; - } - } - abort (); -} -#else -#error "OS unsupported" -#endif - -static void -verify0 (char *p, size_t s) -{ - for (size_t i = 0; i < s; ++i) - if (p[i] != 0) - abort (); -} - -#include - -int -main () -{ - /* Choose a small size where all our allocations fit on one page. */ - const int SIZE = 10; -#ifndef OFFLOAD_DEVICE_NVPTX - CHECK_SIZE (SIZE*4); -#endif - - const omp_alloctrait_t traits[] = { - { omp_atk_pinned, 1 } - }; - omp_allocator_handle_t allocator = omp_init_allocator (omp_default_mem_space, 1, traits); - - // Sanity check - if (get_pinned_mem () != 0) - abort (); - - void *p = omp_alloc (SIZE, allocator); - if (!p) - abort (); - - int amount = get_pinned_mem (); -#ifdef OFFLOAD_DEVICE_NVPTX - /* This doesn't show up as process 'VmLck'ed memory. */ - if (amount != 0) - abort (); -#else - if (amount == 0) - abort (); -#endif - - p = omp_realloc (p, SIZE * 2, allocator, allocator); - - int amount2 = get_pinned_mem (); -#ifdef OFFLOAD_DEVICE_NVPTX - /* This doesn't show up as process 'VmLck'ed memory. */ - if (amount2 != 0) - abort (); -#else - /* A small allocation should not allocate another page. */ - if (amount2 != amount) - abort (); -#endif - - p = omp_calloc (1, SIZE, allocator); - -#ifdef OFFLOAD_DEVICE_NVPTX - /* This doesn't show up as process 'VmLck'ed memory. */ - if (get_pinned_mem () != 0) - abort (); -#else - /* A small allocation should not allocate another page. */ - if (get_pinned_mem () != amount2) - abort (); -#endif - - verify0 (p, SIZE); - - return 0; -} diff --git a/libgomp/usmpin-allocator.c b/libgomp/usmpin-allocator.c deleted file mode 100644 index 311bda5054e..00000000000 --- a/libgomp/usmpin-allocator.c +++ /dev/null @@ -1,319 +0,0 @@ -/* Copyright (C) 2023 Free Software Foundation, Inc. - - This file is part of the GNU Offloading and Multi Processing Library - (libgomp). - - Libgomp is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -/* This is a simple "malloc" implementation intended for use with Unified - Shared Memory and Pinned Memory. It allocates memory from a pool allocated - and configured by the device plugin (for USM), or the OS-specific allocator - (for pinned). - - This implementation keeps the allocated/free chain in a side-table (splay - tree) to ensure that the allocation routine does not migrate all the USM - pages back into host memory. Keeping the meta-data elsewhere is also useful - for pinned memory, which is typically an extremely limited resource. */ - -#include -#include "libgomp.h" - -/* Use a splay tree to track allocations. */ - -typedef struct usmpin_splay_tree_node_s *usmpin_splay_tree_node; -typedef struct usmpin_splay_tree_s *usmpin_splay_tree; -typedef struct usmpin_splay_tree_key_s *usmpin_splay_tree_key; - -struct usmpin_splay_tree_key_s { - void *base; - size_t size; -}; - -static inline int -usmpin_splay_compare (usmpin_splay_tree_key x, usmpin_splay_tree_key y) -{ - return (x->base == y->base ? 0 - : x->base > y->base ? 1 - : -1); -} - -#define splay_tree_prefix usmpin -#include "splay-tree.h" - -/* 128-byte granularity means GPU cache-line aligned. */ -#define ALIGN(VAR) (((VAR) + 127) & ~127) - -/* The context data prevents the need for global state. */ -struct usmpin_context { - int lock; - struct usmpin_splay_tree_s allocations; - struct usmpin_splay_tree_s free_space; -}; - -usmpin_ctx_p -usmpin_init_context () -{ - return calloc (1, sizeof (struct usmpin_context)); -} - -/* Coalesce contiguous free space into one entry. This considers the entries - either side of the root node only, so it should be called each time a new - entry in inserted into the root. */ - -static void -usmpin_coalesce_free_space (usmpin_ctx_p ctx) -{ - usmpin_splay_tree_node prev, next, node = ctx->free_space.root; - - for (prev = node->left; prev && prev->right; prev = prev->right) - ; - for (next = node->right; next && next->left; next = next->left) - ; - - /* Coalesce adjacent free chunks. */ - if (next - && node->key.base + node->key.size == next->key.base) - { - /* Free chunk follows. */ - node->key.size += next->key.size; - usmpin_splay_tree_remove (&ctx->free_space, &next->key); - free (next); - } - if (prev - && prev->key.base + prev->key.size == node->key.base) - { - /* Free chunk precedes. */ - prev->key.size += node->key.size; - usmpin_splay_tree_remove (&ctx->free_space, &node->key); - free (node); - } -} - -/* Add a new memory region into the free chain. This is how the USM heap is - initialized and extended. If the new region is contiguous with an existing - region then any free space will be coalesced. */ - -void -usmpin_register_memory (usmpin_ctx_p ctx, char *base, size_t size) -{ - if (base == NULL || ctx == NULL) - return; - - while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1) - ; - - usmpin_splay_tree_node node; - node = malloc (sizeof (struct usmpin_splay_tree_node_s)); - node->key.base = base; - node->key.size = size; - node->left = NULL; - node->right = NULL; - usmpin_splay_tree_insert (&ctx->free_space, node); - usmpin_coalesce_free_space (ctx); - - __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE); -} - -/* This splay_tree_foreach callback selects the first free space large enough - to hold the allocation needed. Since the splay_tree walk may start in the - middle the "first" isn't necessarily the "leftmost" entry. */ - -struct usmpin_callback_data { - size_t size; - usmpin_splay_tree_node found; -}; - -static int -usmpin_alloc_callback (usmpin_splay_tree_key key, void *data) -{ - struct usmpin_callback_data *cbd = (struct usmpin_callback_data *)data; - - if (key->size >= cbd->size) - { - cbd->found = (usmpin_splay_tree_node)key; - return 1; - } - - return 0; -} - -/* USM "malloc". Selects and moves and address range from ctx->free_space to - ctx->allocations, while leaving any excess in ctx->free_space. */ - -void * -usmpin_alloc (usmpin_ctx_p ctx, size_t size) -{ - if (ctx == NULL) - return NULL; - - /* Memory is allocated in N-byte granularity. */ - size = ALIGN (size); - - /* Acquire the lock. */ - while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1) - ; - - if (!ctx->free_space.root) - { - /* No memory registered, or no free space. */ - __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE); - return NULL; - } - - /* Find a suitable free block. */ - struct usmpin_callback_data cbd = {size, NULL}; - usmpin_splay_tree_foreach_lazy (&ctx->free_space, usmpin_alloc_callback, - &cbd); - usmpin_splay_tree_node freenode = cbd.found; - - void *result = NULL; - if (freenode) - { - /* Allocation successful. */ - result = freenode->key.base; - usmpin_splay_tree_node allocnode = malloc (sizeof (*allocnode)); - allocnode->key.base = result; - allocnode->key.size = size; - allocnode->left = NULL; - allocnode->right = NULL; - usmpin_splay_tree_insert (&ctx->allocations, allocnode); - - /* Update the free chain. */ - size_t stillfree_size = freenode->key.size - size; - if (stillfree_size > 0) - { - freenode->key.base = freenode->key.base + size; - freenode->key.size = stillfree_size; - } - else - { - usmpin_splay_tree_remove (&ctx->free_space, &freenode->key); - free (freenode); - } - } - - /* Release the lock. */ - __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE); - - return result; -} - -/* USM "free". Moves an address range from ctx->allocations to - ctx->free_space and merges that record with any contiguous free memory. */ - -void -usmpin_free (usmpin_ctx_p ctx, void *addr) -{ - if (ctx == NULL) - return; - - /* Acquire the lock. */ - while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1) - ; - - /* Convert the memory map to free. */ - struct usmpin_splay_tree_key_s key = {addr}; - usmpin_splay_tree_key found = usmpin_splay_tree_lookup (&ctx->allocations, - &key); - if (!found) - GOMP_PLUGIN_fatal ("invalid free"); - usmpin_splay_tree_remove (&ctx->allocations, &key); - usmpin_splay_tree_insert (&ctx->free_space, (usmpin_splay_tree_node)found); - usmpin_coalesce_free_space (ctx); - - /* Release the lock. */ - __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE); -} - -/* USM "realloc". Works in-place, if possible; reallocates otherwise. */ - -void * -usmpin_realloc (usmpin_ctx_p ctx, void *addr, size_t newsize) -{ - if (ctx == NULL) - return NULL; - - newsize = ALIGN (newsize); - - /* Acquire the lock. */ - while (__atomic_exchange_n (&ctx->lock, 1, MEMMODEL_ACQUIRE) == 1) - ; - - /* Convert the memory map to free. */ - struct usmpin_splay_tree_key_s key = {addr}; - usmpin_splay_tree_key found = usmpin_splay_tree_lookup (&ctx->allocations, - &key); - if (!found) - GOMP_PLUGIN_fatal ("invalid realloc"); - - if (newsize == found->size) - ; /* Nothing to do. */ - else if (newsize < found->size) - { - /* We're reducing the allocation size. */ - usmpin_splay_tree_node newfree = malloc (sizeof (*newfree)); - newfree->key.base = found->base + newsize; - newfree->key.size = found->size - newsize; - newfree->left = NULL; - newfree->right = NULL; - usmpin_splay_tree_insert (&ctx->free_space, newfree); - usmpin_coalesce_free_space (ctx); - } - else - { - /* We're extending the allocation. */ - struct usmpin_splay_tree_key_s freekey = {addr + found->size}; - usmpin_splay_tree_key foundfree; - foundfree = usmpin_splay_tree_lookup (&ctx->free_space, &freekey); - if (foundfree && foundfree->size >= newsize - found->size) - { - /* Allocation can be expanded in place. */ - foundfree->base += found->size; - foundfree->size -= newsize - found->size; - found->size = newsize; - - if (foundfree->size == 0) - usmpin_splay_tree_remove (&ctx->free_space, &freekey); - } - else - { - /* Allocation must be relocated. - Release the lock and use alloc/free. */ - __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE); - - void *newaddr = usmpin_alloc (ctx, newsize); - if (!newaddr) - return NULL; - - memcpy (newaddr, addr, found->size); - usmpin_free (ctx, addr); - return newaddr; - } - } - - /* Release the lock. */ - __atomic_store_n (&ctx->lock, 0, MEMMODEL_RELEASE); - return addr; -} - -/* Include the splay tree code inline, with the prefixes added. */ -#define splay_tree_prefix usmpin -#define splay_tree_c -#include "splay-tree.h"