]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgomp/config/linux/allocator.c
'-foffload-memory=pinned' using offloading device interfaces
[thirdparty/gcc.git] / libgomp / config / linux / allocator.c
CommitLineData
ab7520b3
AS
1/* Copyright (C) 2022 Free Software Foundation, Inc.
2
3 This file is part of the GNU Offloading and Multi Processing Library
4 (libgomp).
5
6 Libgomp is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25/* Implement malloc routines that can handle pinned memory on Linux.
26
a5a4800e 27 Given that pinned memory is typically used to help host <-> device memory
4bd844f3
TS
28 transfers, we attempt to allocate such memory using a device (really:
29 libgomp plugin), but fall back to mmap plus mlock if no suitable device is
30 available.
a5a4800e 31
ab7520b3
AS
32 It's possible to use mlock on any heap memory, but using munlock is
33 problematic if there are multiple pinned allocations on the same page.
34 Tracking all that manually would be possible, but adds overhead. This may
35 be worth it if there are a lot of small allocations getting pinned, but
36 this seems less likely in a HPC application.
37
38 Instead we optimize for large pinned allocations, and use mmap to ensure
39 that two pinned allocations don't share the same page. This also means
40 that large allocations don't pin extra pages by being poorly aligned. */
41
42#define _GNU_SOURCE
43#include <sys/mman.h>
44#include <string.h>
a5a4800e 45#include <assert.h>
ab7520b3
AS
46#include "libgomp.h"
47
4bd844f3 48static int using_device_for_page_locked
a5a4800e
TS
49 = /* uninitialized */ -1;
50
ab7520b3 51static void *
4bd844f3
TS
52linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin,
53 bool init0)
ab7520b3 54{
4bd844f3
TS
55 gomp_debug (0, "%s: memspace=%llu, size=%llu, pin=%d, init0=%d\n",
56 __FUNCTION__, (unsigned long long) memspace,
57 (unsigned long long) size, pin, init0);
a5a4800e 58
4bd844f3
TS
59 void *addr;
60
84914e19 61 if (memspace == ompx_unified_shared_mem_space)
4bd844f3 62 addr = gomp_usm_alloc (size, GOMP_DEVICE_ICV);
84914e19 63 else if (pin)
ab7520b3 64 {
a5a4800e 65 int using_device
4bd844f3 66 = __atomic_load_n (&using_device_for_page_locked,
a5a4800e
TS
67 MEMMODEL_RELAXED);
68 gomp_debug (0, " using_device=%d\n",
69 using_device);
70 if (using_device != 0)
71 {
4bd844f3 72 using_device = gomp_page_locked_host_alloc (&addr, size);
a5a4800e 73 int using_device_old
4bd844f3 74 = __atomic_exchange_n (&using_device_for_page_locked,
a5a4800e
TS
75 using_device, MEMMODEL_RELAXED);
76 gomp_debug (0, " using_device=%d, using_device_old=%d\n",
77 using_device, using_device_old);
78 assert (using_device_old == -1
79 /* We shouldn't have concurrently changed our mind. */
80 || using_device_old == using_device);
81 }
82 if (using_device == 0)
ab7520b3 83 {
4bd844f3
TS
84 gomp_debug (0, " mmap\n");
85 addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
86 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
87 if (addr == MAP_FAILED)
88 addr = NULL;
89 else
a5a4800e 90 {
4bd844f3
TS
91 /* 'mmap' zero-initializes. */
92 init0 = false;
93
94 gomp_debug (0, " mlock\n");
95 if (mlock (addr, size))
96 {
97 gomp_debug (0, "libgomp: failed to pin memory"
98 " (ulimit too low?)\n");
99 munmap (addr, size);
100 addr = NULL;
101 }
a5a4800e 102 }
ab7520b3 103 }
ab7520b3
AS
104 }
105 else
4bd844f3
TS
106 addr = malloc (size);
107
108 if (addr && init0)
109 {
110 gomp_debug (0, " init0\n");
111 memset (addr, 0, size);
112 }
113
114 return addr;
ab7520b3
AS
115}
116
117static void *
118linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
119{
a5a4800e
TS
120 gomp_debug (0, "%s: memspace=%llu, size=%llu, pin=%d\n",
121 __FUNCTION__, (unsigned long long) memspace, (unsigned long long) size, pin);
122
84914e19
AS
123 if (memspace == ompx_unified_shared_mem_space)
124 {
125 void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV);
126 memset (ret, 0, size);
127 return ret;
128 }
23f52e49 129 else if (pin)
4bd844f3 130 return linux_memspace_alloc (memspace, size, pin, true);
ab7520b3
AS
131 else
132 return calloc (1, size);
133}
134
135static void
136linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size,
137 int pin)
138{
a5a4800e
TS
139 gomp_debug (0, "%s: memspace=%llu, addr=%p, size=%llu, pin=%d\n",
140 __FUNCTION__, (unsigned long long) memspace, addr, (unsigned long long) size, pin);
141
84914e19
AS
142 if (memspace == ompx_unified_shared_mem_space)
143 gomp_usm_free (addr, GOMP_DEVICE_ICV);
144 else if (pin)
a5a4800e
TS
145 {
146 int using_device
4bd844f3 147 = __atomic_load_n (&using_device_for_page_locked,
a5a4800e
TS
148 MEMMODEL_RELAXED);
149 gomp_debug (0, " using_device=%d\n",
150 using_device);
151 if (using_device == 1)
4bd844f3 152 gomp_page_locked_host_free (addr);
a5a4800e
TS
153 else
154 /* 'munlock'ing is implicit with following 'munmap'. */
4bd844f3 155 munmap (addr, size);
a5a4800e 156 }
ab7520b3
AS
157 else
158 free (addr);
159}
160
161static void *
162linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
163 size_t oldsize, size_t size, int oldpin, int pin)
164{
a5a4800e
TS
165 gomp_debug (0, "%s: memspace=%llu, addr=%p, oldsize=%llu, size=%llu, oldpin=%d, pin=%d\n",
166 __FUNCTION__, (unsigned long long) memspace, addr, (unsigned long long) oldsize, (unsigned long long) size, oldpin, pin);
167
84914e19
AS
168 if (memspace == ompx_unified_shared_mem_space)
169 goto manual_realloc;
170 else if (oldpin && pin)
ab7520b3 171 {
a5a4800e 172 /* We can only expect to be able to just 'mremap' if not using a device
4bd844f3 173 for page-locked memory. */
a5a4800e 174 int using_device
4bd844f3 175 = __atomic_load_n (&using_device_for_page_locked,
a5a4800e
TS
176 MEMMODEL_RELAXED);
177 gomp_debug (0, " using_device=%d\n",
178 using_device);
179 if (using_device != 0)
180 goto manual_realloc;
181
182 gomp_debug (0, " mremap\n");
ab7520b3
AS
183 void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE);
184 if (newaddr == MAP_FAILED)
185 return NULL;
186
187 return newaddr;
188 }
189 else if (oldpin || pin)
84914e19 190 goto manual_realloc;
ab7520b3
AS
191 else
192 return realloc (addr, size);
84914e19
AS
193
194manual_realloc:
4bd844f3 195 void *newaddr = linux_memspace_alloc (memspace, size, pin, false);
84914e19
AS
196 if (newaddr)
197 {
198 memcpy (newaddr, addr, oldsize < size ? oldsize : size);
199 linux_memspace_free (memspace, addr, oldsize, oldpin);
200 }
201
202 return newaddr;
ab7520b3
AS
203}
204
205#define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \
4bd844f3 206 linux_memspace_alloc (MEMSPACE, SIZE, PIN, false)
ab7520b3
AS
207#define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \
208 linux_memspace_calloc (MEMSPACE, SIZE, PIN)
209#define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \
210 linux_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN)
211#define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \
212 linux_memspace_free (MEMSPACE, ADDR, SIZE, PIN)
213
214#include "../../allocator.c"