]>
Commit | Line | Data |
---|---|---|
ab7520b3 AS |
1 | /* Copyright (C) 2022 Free Software Foundation, Inc. |
2 | ||
3 | This file is part of the GNU Offloading and Multi Processing Library | |
4 | (libgomp). | |
5 | ||
6 | Libgomp is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 | more details. | |
15 | ||
16 | Under Section 7 of GPL version 3, you are granted additional | |
17 | permissions described in the GCC Runtime Library Exception, version | |
18 | 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | You should have received a copy of the GNU General Public License and | |
21 | a copy of the GCC Runtime Library Exception along with this program; | |
22 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | <http://www.gnu.org/licenses/>. */ | |
24 | ||
25 | /* Implement malloc routines that can handle pinned memory on Linux. | |
26 | ||
a5a4800e | 27 | Given that pinned memory is typically used to help host <-> device memory |
4bd844f3 TS |
28 | transfers, we attempt to allocate such memory using a device (really: |
29 | libgomp plugin), but fall back to mmap plus mlock if no suitable device is | |
30 | available. | |
a5a4800e | 31 | |
ab7520b3 AS |
32 | It's possible to use mlock on any heap memory, but using munlock is |
33 | problematic if there are multiple pinned allocations on the same page. | |
34 | Tracking all that manually would be possible, but adds overhead. This may | |
35 | be worth it if there are a lot of small allocations getting pinned, but | |
36 | this seems less likely in a HPC application. | |
37 | ||
38 | Instead we optimize for large pinned allocations, and use mmap to ensure | |
39 | that two pinned allocations don't share the same page. This also means | |
40 | that large allocations don't pin extra pages by being poorly aligned. */ | |
41 | ||
42 | #define _GNU_SOURCE | |
43 | #include <sys/mman.h> | |
44 | #include <string.h> | |
a5a4800e | 45 | #include <assert.h> |
ab7520b3 AS |
46 | #include "libgomp.h" |
47 | ||
4bd844f3 | 48 | static int using_device_for_page_locked |
a5a4800e TS |
49 | = /* uninitialized */ -1; |
50 | ||
ab7520b3 | 51 | static void * |
4bd844f3 TS |
52 | linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin, |
53 | bool init0) | |
ab7520b3 | 54 | { |
4bd844f3 TS |
55 | gomp_debug (0, "%s: memspace=%llu, size=%llu, pin=%d, init0=%d\n", |
56 | __FUNCTION__, (unsigned long long) memspace, | |
57 | (unsigned long long) size, pin, init0); | |
a5a4800e | 58 | |
4bd844f3 TS |
59 | void *addr; |
60 | ||
84914e19 | 61 | if (memspace == ompx_unified_shared_mem_space) |
4bd844f3 | 62 | addr = gomp_usm_alloc (size, GOMP_DEVICE_ICV); |
84914e19 | 63 | else if (pin) |
ab7520b3 | 64 | { |
a5a4800e | 65 | int using_device |
4bd844f3 | 66 | = __atomic_load_n (&using_device_for_page_locked, |
a5a4800e TS |
67 | MEMMODEL_RELAXED); |
68 | gomp_debug (0, " using_device=%d\n", | |
69 | using_device); | |
70 | if (using_device != 0) | |
71 | { | |
4bd844f3 | 72 | using_device = gomp_page_locked_host_alloc (&addr, size); |
a5a4800e | 73 | int using_device_old |
4bd844f3 | 74 | = __atomic_exchange_n (&using_device_for_page_locked, |
a5a4800e TS |
75 | using_device, MEMMODEL_RELAXED); |
76 | gomp_debug (0, " using_device=%d, using_device_old=%d\n", | |
77 | using_device, using_device_old); | |
78 | assert (using_device_old == -1 | |
79 | /* We shouldn't have concurrently changed our mind. */ | |
80 | || using_device_old == using_device); | |
81 | } | |
82 | if (using_device == 0) | |
ab7520b3 | 83 | { |
4bd844f3 TS |
84 | gomp_debug (0, " mmap\n"); |
85 | addr = mmap (NULL, size, PROT_READ | PROT_WRITE, | |
86 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); | |
87 | if (addr == MAP_FAILED) | |
88 | addr = NULL; | |
89 | else | |
a5a4800e | 90 | { |
4bd844f3 TS |
91 | /* 'mmap' zero-initializes. */ |
92 | init0 = false; | |
93 | ||
94 | gomp_debug (0, " mlock\n"); | |
95 | if (mlock (addr, size)) | |
96 | { | |
97 | gomp_debug (0, "libgomp: failed to pin memory" | |
98 | " (ulimit too low?)\n"); | |
99 | munmap (addr, size); | |
100 | addr = NULL; | |
101 | } | |
a5a4800e | 102 | } |
ab7520b3 | 103 | } |
ab7520b3 AS |
104 | } |
105 | else | |
4bd844f3 TS |
106 | addr = malloc (size); |
107 | ||
108 | if (addr && init0) | |
109 | { | |
110 | gomp_debug (0, " init0\n"); | |
111 | memset (addr, 0, size); | |
112 | } | |
113 | ||
114 | return addr; | |
ab7520b3 AS |
115 | } |
116 | ||
117 | static void * | |
118 | linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin) | |
119 | { | |
a5a4800e TS |
120 | gomp_debug (0, "%s: memspace=%llu, size=%llu, pin=%d\n", |
121 | __FUNCTION__, (unsigned long long) memspace, (unsigned long long) size, pin); | |
122 | ||
84914e19 AS |
123 | if (memspace == ompx_unified_shared_mem_space) |
124 | { | |
125 | void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV); | |
126 | memset (ret, 0, size); | |
127 | return ret; | |
128 | } | |
23f52e49 | 129 | else if (pin) |
4bd844f3 | 130 | return linux_memspace_alloc (memspace, size, pin, true); |
ab7520b3 AS |
131 | else |
132 | return calloc (1, size); | |
133 | } | |
134 | ||
135 | static void | |
136 | linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size, | |
137 | int pin) | |
138 | { | |
a5a4800e TS |
139 | gomp_debug (0, "%s: memspace=%llu, addr=%p, size=%llu, pin=%d\n", |
140 | __FUNCTION__, (unsigned long long) memspace, addr, (unsigned long long) size, pin); | |
141 | ||
84914e19 AS |
142 | if (memspace == ompx_unified_shared_mem_space) |
143 | gomp_usm_free (addr, GOMP_DEVICE_ICV); | |
144 | else if (pin) | |
a5a4800e TS |
145 | { |
146 | int using_device | |
4bd844f3 | 147 | = __atomic_load_n (&using_device_for_page_locked, |
a5a4800e TS |
148 | MEMMODEL_RELAXED); |
149 | gomp_debug (0, " using_device=%d\n", | |
150 | using_device); | |
151 | if (using_device == 1) | |
4bd844f3 | 152 | gomp_page_locked_host_free (addr); |
a5a4800e TS |
153 | else |
154 | /* 'munlock'ing is implicit with following 'munmap'. */ | |
4bd844f3 | 155 | munmap (addr, size); |
a5a4800e | 156 | } |
ab7520b3 AS |
157 | else |
158 | free (addr); | |
159 | } | |
160 | ||
161 | static void * | |
162 | linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr, | |
163 | size_t oldsize, size_t size, int oldpin, int pin) | |
164 | { | |
a5a4800e TS |
165 | gomp_debug (0, "%s: memspace=%llu, addr=%p, oldsize=%llu, size=%llu, oldpin=%d, pin=%d\n", |
166 | __FUNCTION__, (unsigned long long) memspace, addr, (unsigned long long) oldsize, (unsigned long long) size, oldpin, pin); | |
167 | ||
84914e19 AS |
168 | if (memspace == ompx_unified_shared_mem_space) |
169 | goto manual_realloc; | |
170 | else if (oldpin && pin) | |
ab7520b3 | 171 | { |
a5a4800e | 172 | /* We can only expect to be able to just 'mremap' if not using a device |
4bd844f3 | 173 | for page-locked memory. */ |
a5a4800e | 174 | int using_device |
4bd844f3 | 175 | = __atomic_load_n (&using_device_for_page_locked, |
a5a4800e TS |
176 | MEMMODEL_RELAXED); |
177 | gomp_debug (0, " using_device=%d\n", | |
178 | using_device); | |
179 | if (using_device != 0) | |
180 | goto manual_realloc; | |
181 | ||
182 | gomp_debug (0, " mremap\n"); | |
ab7520b3 AS |
183 | void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE); |
184 | if (newaddr == MAP_FAILED) | |
185 | return NULL; | |
186 | ||
187 | return newaddr; | |
188 | } | |
189 | else if (oldpin || pin) | |
84914e19 | 190 | goto manual_realloc; |
ab7520b3 AS |
191 | else |
192 | return realloc (addr, size); | |
84914e19 AS |
193 | |
194 | manual_realloc: | |
4bd844f3 | 195 | void *newaddr = linux_memspace_alloc (memspace, size, pin, false); |
84914e19 AS |
196 | if (newaddr) |
197 | { | |
198 | memcpy (newaddr, addr, oldsize < size ? oldsize : size); | |
199 | linux_memspace_free (memspace, addr, oldsize, oldpin); | |
200 | } | |
201 | ||
202 | return newaddr; | |
ab7520b3 AS |
203 | } |
204 | ||
205 | #define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \ | |
4bd844f3 | 206 | linux_memspace_alloc (MEMSPACE, SIZE, PIN, false) |
ab7520b3 AS |
207 | #define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \ |
208 | linux_memspace_calloc (MEMSPACE, SIZE, PIN) | |
209 | #define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \ | |
210 | linux_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) | |
211 | #define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \ | |
212 | linux_memspace_free (MEMSPACE, ADDR, SIZE, PIN) | |
213 | ||
214 | #include "../../allocator.c" |