]>
Commit | Line | Data |
---|---|---|
30486fab AS |
1 | /* Copyright (C) 2023 Free Software Foundation, Inc. |
2 | ||
3 | This file is part of the GNU Offloading and Multi Processing Library | |
4 | (libgomp). | |
5 | ||
6 | Libgomp is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 | more details. | |
15 | ||
16 | Under Section 7 of GPL version 3, you are granted additional | |
17 | permissions described in the GCC Runtime Library Exception, version | |
18 | 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | You should have received a copy of the GNU General Public License and | |
21 | a copy of the GCC Runtime Library Exception along with this program; | |
22 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | <http://www.gnu.org/licenses/>. */ | |
24 | ||
25 | /* This is a basic "malloc" implementation intended for use with small, | |
26 | low-latency memories. | |
27 | ||
28 | To use this template, define BASIC_ALLOC_PREFIX, and then #include the | |
29 | source file. The other configuration macros are optional. | |
30 | ||
31 | The root heap descriptor is stored in the first bytes of the heap, and each | |
32 | free chunk contains a similar descriptor for the next free chunk in the | |
33 | chain. | |
34 | ||
35 | The descriptor is two values: offset and size, which describe the | |
36 | location of a chunk of memory available for allocation. The offset is | |
37 | relative to the base of the heap. The special offset value 0xffffffff | |
38 | indicates that the heap (free chain) is locked. The offset and size are | |
39 | 32-bit values so the base alignment can be 8-bytes. | |
40 | ||
41 | Memory is allocated to the first free chunk that fits. The free chain | |
42 | is always stored in order of the offset to assist coalescing adjacent | |
43 | chunks. */ | |
44 | ||
45 | #include "libgomp.h" | |
46 | ||
47 | #ifndef BASIC_ALLOC_PREFIX | |
48 | #error "BASIC_ALLOC_PREFIX not defined." | |
49 | #endif | |
50 | ||
51 | #ifndef BASIC_ALLOC_YIELD | |
52 | #define BASIC_ALLOC_YIELD | |
53 | #endif | |
54 | ||
55 | #define ALIGN(VAR) (((VAR) + 7) & ~7) /* 8-byte granularity. */ | |
56 | ||
57 | #define fn1(prefix, name) prefix ## _ ## name | |
58 | #define fn(prefix, name) fn1 (prefix, name) | |
59 | #define basic_alloc_init fn(BASIC_ALLOC_PREFIX,init) | |
60 | #define basic_alloc_alloc fn(BASIC_ALLOC_PREFIX,alloc) | |
61 | #define basic_alloc_calloc fn(BASIC_ALLOC_PREFIX,calloc) | |
62 | #define basic_alloc_free fn(BASIC_ALLOC_PREFIX,free) | |
63 | #define basic_alloc_realloc fn(BASIC_ALLOC_PREFIX,realloc) | |
64 | ||
65 | typedef struct { | |
66 | uint32_t offset; | |
67 | uint32_t size; | |
68 | } heapdesc; | |
69 | ||
70 | void | |
71 | basic_alloc_init (char *heap, size_t limit) | |
72 | { | |
73 | if (heap == NULL) | |
74 | return; | |
75 | ||
76 | /* Initialize the head of the free chain. */ | |
77 | heapdesc *root = (heapdesc *) heap; | |
78 | root->offset = ALIGN(1); | |
79 | root->size = limit - root->offset; | |
80 | ||
81 | /* And terminate the chain. */ | |
82 | heapdesc *next = (heapdesc *) (heap + root->offset); | |
83 | next->offset = 0; | |
84 | next->size = 0; | |
85 | } | |
86 | ||
87 | static void * | |
88 | basic_alloc_alloc (char *heap, size_t size) | |
89 | { | |
90 | if (heap == NULL) | |
91 | return NULL; | |
92 | ||
93 | /* Memory is allocated in N-byte granularity. */ | |
94 | size = ALIGN (size); | |
95 | ||
96 | /* Acquire a lock on the low-latency heap. */ | |
97 | heapdesc root, *root_ptr = (heapdesc *) heap; | |
98 | do | |
99 | { | |
100 | root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, | |
101 | MEMMODEL_ACQUIRE); | |
102 | if (root.offset != 0xffffffff) | |
103 | { | |
104 | root.size = root_ptr->size; | |
105 | break; | |
106 | } | |
107 | /* Spin. */ | |
108 | BASIC_ALLOC_YIELD; | |
109 | } | |
110 | while (1); | |
111 | ||
112 | /* Walk the free chain. */ | |
113 | heapdesc chunk = root; | |
114 | heapdesc *prev_chunkptr = NULL; | |
115 | heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); | |
116 | heapdesc onward_chain = *chunkptr; | |
117 | while (chunk.size != 0 && (uint32_t) size > chunk.size) | |
118 | { | |
119 | chunk = onward_chain; | |
120 | prev_chunkptr = chunkptr; | |
121 | chunkptr = (heapdesc *) (heap + chunk.offset); | |
122 | onward_chain = *chunkptr; | |
123 | } | |
124 | ||
125 | void *result = NULL; | |
126 | if (chunk.size != 0) | |
127 | { | |
128 | /* Allocation successful. */ | |
129 | result = chunkptr; | |
130 | ||
131 | /* Update the free chain. */ | |
132 | heapdesc stillfree = chunk; | |
133 | stillfree.offset += size; | |
134 | stillfree.size -= size; | |
135 | heapdesc *stillfreeptr = (heapdesc *) (heap + stillfree.offset); | |
136 | ||
137 | if (stillfree.size == 0) | |
138 | /* The whole chunk was used. */ | |
139 | stillfree = onward_chain; | |
140 | else | |
141 | /* The chunk was split, so restore the onward chain. */ | |
142 | *stillfreeptr = onward_chain; | |
143 | ||
144 | /* The previous free slot or root now points to stillfree. */ | |
145 | if (prev_chunkptr) | |
146 | *prev_chunkptr = stillfree; | |
147 | else | |
148 | root = stillfree; | |
149 | } | |
150 | ||
151 | /* Update the free chain root and release the lock. */ | |
152 | root_ptr->size = root.size; | |
153 | __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); | |
154 | ||
155 | return result; | |
156 | } | |
157 | ||
158 | static void * | |
159 | basic_alloc_calloc (char *heap, size_t size) | |
160 | { | |
161 | /* Memory is allocated in N-byte granularity. */ | |
162 | size = ALIGN (size); | |
163 | ||
164 | uint64_t *result = basic_alloc_alloc (heap, size); | |
165 | if (result) | |
166 | /* Inline memset in which we know size is a multiple of 8. */ | |
167 | for (unsigned i = 0; i < (unsigned) size / 8; i++) | |
168 | result[i] = 0; | |
169 | ||
170 | return result; | |
171 | } | |
172 | ||
173 | static void | |
174 | basic_alloc_free (char *heap, void *addr, size_t size) | |
175 | { | |
176 | /* Memory is allocated in N-byte granularity. */ | |
177 | size = ALIGN (size); | |
178 | ||
179 | /* Acquire a lock on the low-latency heap. */ | |
180 | heapdesc root, *root_ptr = (heapdesc *) heap; | |
181 | do | |
182 | { | |
183 | root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, | |
184 | MEMMODEL_ACQUIRE); | |
185 | if (root.offset != 0xffffffff) | |
186 | { | |
187 | root.size = root_ptr->size; | |
188 | break; | |
189 | } | |
190 | /* Spin. */ | |
191 | BASIC_ALLOC_YIELD; | |
192 | } | |
193 | while (1); | |
194 | ||
195 | /* Walk the free chain to find where to insert a new entry. */ | |
196 | heapdesc chunk = root, prev_chunk = {0}; | |
197 | heapdesc *prev_chunkptr = NULL, *prevprev_chunkptr = NULL; | |
198 | heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); | |
199 | heapdesc onward_chain = *chunkptr; | |
200 | while (chunk.size != 0 && addr > (void *) chunkptr) | |
201 | { | |
202 | prev_chunk = chunk; | |
203 | chunk = onward_chain; | |
204 | prevprev_chunkptr = prev_chunkptr; | |
205 | prev_chunkptr = chunkptr; | |
206 | chunkptr = (heapdesc *) (heap + chunk.offset); | |
207 | onward_chain = *chunkptr; | |
208 | } | |
209 | ||
210 | /* Create the new chunk descriptor. */ | |
211 | heapdesc newfreechunk; | |
212 | newfreechunk.offset = (uint32_t) ((uintptr_t) addr - (uintptr_t) heap); | |
213 | newfreechunk.size = (uint32_t) size; | |
214 | ||
215 | /* Coalesce adjacent free chunks. */ | |
216 | if (newfreechunk.offset + size == chunk.offset) | |
217 | { | |
218 | /* Free chunk follows. */ | |
219 | newfreechunk.size += chunk.size; | |
220 | chunk = onward_chain; | |
221 | } | |
222 | if (prev_chunkptr) | |
223 | { | |
224 | if (prev_chunk.offset + prev_chunk.size | |
225 | == newfreechunk.offset) | |
226 | { | |
227 | /* Free chunk precedes. */ | |
228 | newfreechunk.offset = prev_chunk.offset; | |
229 | newfreechunk.size += prev_chunk.size; | |
230 | addr = heap + prev_chunk.offset; | |
231 | prev_chunkptr = prevprev_chunkptr; | |
232 | } | |
233 | } | |
234 | ||
235 | /* Update the free chain in the new and previous chunks. */ | |
236 | *(heapdesc *) addr = chunk; | |
237 | if (prev_chunkptr) | |
238 | *prev_chunkptr = newfreechunk; | |
239 | else | |
240 | root = newfreechunk; | |
241 | ||
242 | /* Update the free chain root and release the lock. */ | |
243 | root_ptr->size = root.size; | |
244 | __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); | |
245 | ||
246 | } | |
247 | ||
248 | static void * | |
249 | basic_alloc_realloc (char *heap, void *addr, size_t oldsize, | |
250 | size_t size) | |
251 | { | |
252 | /* Memory is allocated in N-byte granularity. */ | |
253 | oldsize = ALIGN (oldsize); | |
254 | size = ALIGN (size); | |
255 | ||
256 | if (oldsize == size) | |
257 | return addr; | |
258 | ||
259 | /* Acquire a lock on the low-latency heap. */ | |
260 | heapdesc root, *root_ptr = (heapdesc *) heap; | |
261 | do | |
262 | { | |
263 | root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, | |
264 | MEMMODEL_ACQUIRE); | |
265 | if (root.offset != 0xffffffff) | |
266 | { | |
267 | root.size = root_ptr->size; | |
268 | break; | |
269 | } | |
270 | /* Spin. */ | |
271 | BASIC_ALLOC_YIELD; | |
272 | } | |
273 | while (1); | |
274 | ||
275 | /* Walk the free chain. */ | |
276 | heapdesc chunk = root; | |
277 | heapdesc *prev_chunkptr = NULL; | |
278 | heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); | |
279 | heapdesc onward_chain = *chunkptr; | |
280 | while (chunk.size != 0 && (void *) chunkptr < addr) | |
281 | { | |
282 | chunk = onward_chain; | |
283 | prev_chunkptr = chunkptr; | |
284 | chunkptr = (heapdesc *) (heap + chunk.offset); | |
285 | onward_chain = *chunkptr; | |
286 | } | |
287 | ||
288 | void *result = NULL; | |
289 | if (size < oldsize) | |
290 | { | |
291 | /* The new allocation is smaller than the old; we can always | |
292 | shrink an allocation in place. */ | |
293 | result = addr; | |
294 | ||
295 | heapdesc *nowfreeptr = (heapdesc *) (addr + size); | |
296 | ||
297 | /* Update the free chain. */ | |
298 | heapdesc nowfree; | |
299 | nowfree.offset = (char *) nowfreeptr - heap; | |
300 | nowfree.size = oldsize - size; | |
301 | ||
302 | if (nowfree.offset + size == chunk.offset) | |
303 | { | |
304 | /* Coalesce following free chunk. */ | |
305 | nowfree.size += chunk.size; | |
306 | *nowfreeptr = onward_chain; | |
307 | } | |
308 | else | |
309 | *nowfreeptr = chunk; | |
310 | ||
311 | /* The previous free slot or root now points to nowfree. */ | |
312 | if (prev_chunkptr) | |
313 | *prev_chunkptr = nowfree; | |
314 | else | |
315 | root = nowfree; | |
316 | } | |
317 | else if (chunk.size != 0 | |
318 | && (char *) addr + oldsize == (char *) chunkptr | |
319 | && chunk.size >= size-oldsize) | |
320 | { | |
321 | /* The new allocation is larger than the old, and we found a | |
322 | large enough free block right after the existing block, | |
323 | so we extend into that space. */ | |
324 | result = addr; | |
325 | ||
326 | uint32_t delta = size-oldsize; | |
327 | ||
328 | /* Update the free chain. */ | |
329 | heapdesc stillfree = chunk; | |
330 | stillfree.offset += delta; | |
331 | stillfree.size -= delta; | |
332 | heapdesc *stillfreeptr = (heapdesc *) (heap + stillfree.offset); | |
333 | ||
334 | if (stillfree.size == 0) | |
335 | /* The whole chunk was used. */ | |
336 | stillfree = onward_chain; | |
337 | else | |
338 | /* The chunk was split, so restore the onward chain. */ | |
339 | *stillfreeptr = onward_chain; | |
340 | ||
341 | /* The previous free slot or root now points to stillfree. */ | |
342 | if (prev_chunkptr) | |
343 | *prev_chunkptr = stillfree; | |
344 | else | |
345 | root = stillfree; | |
346 | } | |
347 | /* Else realloc in-place has failed and result remains NULL. */ | |
348 | ||
349 | /* Update the free chain root and release the lock. */ | |
350 | root_ptr->size = root.size; | |
351 | __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); | |
352 | ||
353 | if (result == NULL) | |
354 | { | |
355 | /* The allocation could not be extended in place, so we simply | |
356 | allocate fresh memory and move the data. If we can't allocate | |
357 | from low-latency memory then we leave the original alloaction | |
358 | intact and return NULL. | |
359 | We could do a fall-back to main memory, but we don't know what | |
360 | the fall-back trait said to do. */ | |
361 | result = basic_alloc_alloc (heap, size); | |
362 | if (result != NULL) | |
363 | { | |
364 | /* Inline memcpy in which we know oldsize is a multiple of 8. */ | |
365 | uint64_t *from = addr, *to = result; | |
366 | for (unsigned i = 0; i < (unsigned) oldsize / 8; i++) | |
367 | to[i] = from[i]; | |
368 | ||
369 | basic_alloc_free (heap, addr, oldsize); | |
370 | } | |
371 | } | |
372 | ||
373 | return result; | |
374 | } | |
375 | ||
376 | #undef ALIGN | |
377 | #undef fn1 | |
378 | #undef fn | |
379 | #undef basic_alloc_init | |
380 | #undef basic_alloc_alloc | |
381 | #undef basic_alloc_free | |
382 | #undef basic_alloc_realloc |