]>
Commit | Line | Data |
---|---|---|
f0ed8226 MSO |
1 | /* Alternative malloc implementation for multiple threads without |
2 | lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas | |
3 | ||
4 | Boost Software License - Version 1.0 - August 17th, 2003 | |
5 | ||
6 | Permission is hereby granted, free of charge, to any person or organization | |
7 | obtaining a copy of the software and accompanying documentation covered by | |
8 | this license (the "Software") to use, reproduce, display, distribute, | |
9 | execute, and transmit the Software, and to prepare derivative works of the | |
10 | Software, and to permit third-parties to whom the Software is furnished to | |
11 | do so, all subject to the following: | |
12 | ||
13 | The copyright notices in the Software and this entire statement, including | |
14 | the above license grant, this restriction and the following disclaimer, | |
15 | must be included in all copies of the Software, in whole or in part, and | |
16 | all derivative works of the Software, unless such copies or derivative | |
17 | works are solely in the form of machine-executable object code generated by | |
18 | a source language processor. | |
19 | ||
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
22 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | |
23 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | |
24 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | |
25 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
26 | DEALINGS IN THE SOFTWARE. | |
27 | */ | |
28 | ||
29 | #ifdef _MSC_VER | |
30 | /* Enable full aliasing on MSVC */ | |
31 | /*#pragma optimize("a", on)*/ | |
32 | #endif | |
33 | ||
4550c164 JK |
34 | #pragma GCC diagnostic ignored "-Wunused-parameter" |
35 | ||
f0ed8226 MSO |
36 | /*#define FULLSANITYCHECKS*/ |
37 | ||
38 | #include "nedmalloc.h" | |
a21077e7 | 39 | #if defined(WIN32) |
f0ed8226 MSO |
40 | #include <malloc.h> |
41 | #endif | |
42 | #define MSPACES 1 | |
43 | #define ONLY_MSPACES 1 | |
44 | #ifndef USE_LOCKS | |
45 | #define USE_LOCKS 1 | |
46 | #endif | |
47 | #define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ | |
48 | #undef DEBUG /* dlmalloc wants DEBUG either 0 or 1 */ | |
49 | #ifdef _DEBUG | |
50 | #define DEBUG 1 | |
51 | #else | |
52 | #define DEBUG 0 | |
53 | #endif | |
54 | #ifdef NDEBUG /* Disable assert checking on release builds */ | |
55 | #undef DEBUG | |
56 | #endif | |
57 | /* The default of 64Kb means we spend too much time kernel-side */ | |
58 | #ifndef DEFAULT_GRANULARITY | |
59 | #define DEFAULT_GRANULARITY (1*1024*1024) | |
60 | #endif | |
61 | /*#define USE_SPIN_LOCKS 0*/ | |
62 | ||
63 | ||
64 | /*#define FORCEINLINE*/ | |
65 | #include "malloc.c.h" | |
66 | #ifdef NDEBUG /* Disable assert checking on release builds */ | |
67 | #undef DEBUG | |
68 | #endif | |
69 | ||
70 | /* The maximum concurrent threads in a pool possible */ | |
71 | #ifndef MAXTHREADSINPOOL | |
72 | #define MAXTHREADSINPOOL 16 | |
73 | #endif | |
74 | /* The maximum number of threadcaches which can be allocated */ | |
75 | #ifndef THREADCACHEMAXCACHES | |
76 | #define THREADCACHEMAXCACHES 256 | |
77 | #endif | |
78 | /* The maximum size to be allocated from the thread cache */ | |
79 | #ifndef THREADCACHEMAX | |
80 | #define THREADCACHEMAX 8192 | |
81 | #endif | |
82 | #if 0 | |
83 | /* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ | |
84 | #define THREADCACHEMAXBINS ((13-4)*2) | |
85 | #else | |
86 | /* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ | |
87 | #define THREADCACHEMAXBINS (13-4) | |
88 | #endif | |
89 | /* Point at which the free space in a thread cache is garbage collected */ | |
90 | #ifndef THREADCACHEMAXFREESPACE | |
91 | #define THREADCACHEMAXFREESPACE (512*1024) | |
92 | #endif | |
93 | ||
94 | ||
95 | #ifdef WIN32 | |
96 | #define TLSVAR DWORD | |
97 | #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) | |
98 | #define TLSFREE(k) (!TlsFree(k)) | |
99 | #define TLSGET(k) TlsGetValue(k) | |
100 | #define TLSSET(k, a) (!TlsSetValue(k, a)) | |
101 | #ifdef DEBUG | |
102 | static LPVOID ChkedTlsGetValue(DWORD idx) | |
103 | { | |
104 | LPVOID ret=TlsGetValue(idx); | |
105 | assert(S_OK==GetLastError()); | |
106 | return ret; | |
107 | } | |
108 | #undef TLSGET | |
109 | #define TLSGET(k) ChkedTlsGetValue(k) | |
110 | #endif | |
111 | #else | |
112 | #define TLSVAR pthread_key_t | |
113 | #define TLSALLOC(k) pthread_key_create(k, 0) | |
114 | #define TLSFREE(k) pthread_key_delete(k) | |
115 | #define TLSGET(k) pthread_getspecific(k) | |
116 | #define TLSSET(k, a) pthread_setspecific(k, a) | |
117 | #endif | |
118 | ||
119 | #if 0 | |
120 | /* Only enable if testing with valgrind. Causes misoperation */ | |
121 | #define mspace_malloc(p, s) malloc(s) | |
122 | #define mspace_realloc(p, m, s) realloc(m, s) | |
123 | #define mspace_calloc(p, n, s) calloc(n, s) | |
124 | #define mspace_free(p, m) free(m) | |
125 | #endif | |
126 | ||
127 | ||
128 | #if defined(__cplusplus) | |
129 | #if !defined(NO_NED_NAMESPACE) | |
130 | namespace nedalloc { | |
131 | #else | |
132 | extern "C" { | |
133 | #endif | |
134 | #endif | |
135 | ||
136 | size_t nedblksize(void *mem) THROWSPEC | |
137 | { | |
138 | #if 0 | |
139 | /* Only enable if testing with valgrind. Causes misoperation */ | |
140 | return THREADCACHEMAX; | |
141 | #else | |
142 | if(mem) | |
143 | { | |
144 | mchunkptr p=mem2chunk(mem); | |
145 | assert(cinuse(p)); /* If this fails, someone tried to free a block twice */ | |
146 | if(cinuse(p)) | |
147 | return chunksize(p)-overhead_for(p); | |
148 | } | |
149 | return 0; | |
150 | #endif | |
151 | } | |
152 | ||
153 | void nedsetvalue(void *v) THROWSPEC { nedpsetvalue(0, v); } | |
154 | void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc(0, size); } | |
155 | void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc(0, no, size); } | |
156 | void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc(0, mem, size); } | |
157 | void nedfree(void *mem) THROWSPEC { nedpfree(0, mem); } | |
158 | void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); } | |
159 | #if !NO_MALLINFO | |
160 | struct mallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo(0); } | |
161 | #endif | |
162 | int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt(0, parno, value); } | |
163 | int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim(0, pad); } | |
241c957d RJ |
164 | void nedmalloc_stats(void) THROWSPEC { nedpmalloc_stats(0); } |
165 | size_t nedmalloc_footprint(void) THROWSPEC { return nedpmalloc_footprint(0); } | |
f0ed8226 MSO |
166 | void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc(0, elemsno, elemsize, chunks); } |
167 | void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc(0, elems, sizes, chunks); } | |
168 | ||
169 | struct threadcacheblk_t; | |
170 | typedef struct threadcacheblk_t threadcacheblk; | |
171 | struct threadcacheblk_t | |
172 | { /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ | |
173 | #ifdef FULLSANITYCHECKS | |
174 | unsigned int magic; | |
175 | #endif | |
176 | unsigned int lastUsed, size; | |
177 | threadcacheblk *next, *prev; | |
178 | }; | |
179 | typedef struct threadcache_t | |
180 | { | |
181 | #ifdef FULLSANITYCHECKS | |
182 | unsigned int magic1; | |
183 | #endif | |
184 | int mymspace; /* Last mspace entry this thread used */ | |
185 | long threadid; | |
186 | unsigned int mallocs, frees, successes; | |
187 | size_t freeInCache; /* How much free space is stored in this cache */ | |
188 | threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; | |
189 | #ifdef FULLSANITYCHECKS | |
190 | unsigned int magic2; | |
191 | #endif | |
192 | } threadcache; | |
193 | struct nedpool_t | |
194 | { | |
195 | MLOCK_T mutex; | |
196 | void *uservalue; | |
197 | int threads; /* Max entries in m to use */ | |
198 | threadcache *caches[THREADCACHEMAXCACHES]; | |
199 | TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ | |
200 | mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ | |
201 | }; | |
202 | static nedpool syspool; | |
203 | ||
204 | static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC | |
205 | { /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ | |
206 | unsigned int topbit, size=(unsigned int)(_size>>4); | |
207 | /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ | |
208 | ||
209 | #if defined(__GNUC__) | |
210 | topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); | |
211 | #elif defined(_MSC_VER) && _MSC_VER>=1300 | |
212 | { | |
213 | unsigned long bsrTopBit; | |
214 | ||
215 | _BitScanReverse(&bsrTopBit, size); | |
216 | ||
217 | topbit = bsrTopBit; | |
218 | } | |
219 | #else | |
220 | #if 0 | |
221 | union { | |
222 | unsigned asInt[2]; | |
223 | double asDouble; | |
224 | }; | |
225 | int n; | |
226 | ||
227 | asDouble = (double)size + 0.5; | |
228 | topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; | |
229 | #else | |
230 | { | |
231 | unsigned int x=size; | |
232 | x = x | (x >> 1); | |
233 | x = x | (x >> 2); | |
234 | x = x | (x >> 4); | |
235 | x = x | (x >> 8); | |
236 | x = x | (x >>16); | |
237 | x = ~x; | |
238 | x = x - ((x >> 1) & 0x55555555); | |
239 | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); | |
240 | x = (x + (x >> 4)) & 0x0F0F0F0F; | |
241 | x = x + (x << 8); | |
242 | x = x + (x << 16); | |
243 | topbit=31 - (x >> 24); | |
244 | } | |
245 | #endif | |
246 | #endif | |
247 | return topbit; | |
248 | } | |
249 | ||
250 | ||
251 | #ifdef FULLSANITYCHECKS | |
252 | static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC | |
253 | { | |
254 | assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); | |
255 | if(ptr[0] && ptr[1]) | |
256 | { | |
257 | assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); | |
258 | assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); | |
259 | assert(*(unsigned int *) "NEDN"==ptr[0]->magic); | |
260 | assert(*(unsigned int *) "NEDN"==ptr[1]->magic); | |
261 | assert(!ptr[0]->prev); | |
262 | assert(!ptr[1]->next); | |
263 | if(ptr[0]==ptr[1]) | |
264 | { | |
265 | assert(!ptr[0]->next); | |
266 | assert(!ptr[1]->prev); | |
267 | } | |
268 | } | |
269 | } | |
270 | static void tcfullsanitycheck(threadcache *tc) THROWSPEC | |
271 | { | |
272 | threadcacheblk **tcbptr=tc->bins; | |
273 | int n; | |
274 | for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) | |
275 | { | |
276 | threadcacheblk *b, *ob=0; | |
277 | tcsanitycheck(tcbptr); | |
278 | for(b=tcbptr[0]; b; ob=b, b=b->next) | |
279 | { | |
280 | assert(*(unsigned int *) "NEDN"==b->magic); | |
281 | assert(!ob || ob->next==b); | |
282 | assert(!ob || b->prev==ob); | |
283 | } | |
284 | } | |
285 | } | |
286 | #endif | |
287 | ||
288 | static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC | |
289 | { | |
290 | #ifdef FULLSANITYCHECKS | |
291 | tcfullsanitycheck(tc); | |
292 | #endif | |
293 | if(tc->freeInCache) | |
294 | { | |
295 | threadcacheblk **tcbptr=tc->bins; | |
296 | int n; | |
297 | for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) | |
298 | { | |
299 | threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ | |
300 | /*tcsanitycheck(tcbptr);*/ | |
301 | for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) | |
302 | { | |
303 | threadcacheblk *f=*tcb; | |
304 | size_t blksize=f->size; /*nedblksize(f);*/ | |
305 | assert(blksize<=nedblksize(f)); | |
306 | assert(blksize); | |
307 | #ifdef FULLSANITYCHECKS | |
308 | assert(*(unsigned int *) "NEDN"==(*tcb)->magic); | |
309 | #endif | |
310 | *tcb=(*tcb)->prev; | |
311 | if(*tcb) | |
312 | (*tcb)->next=0; | |
313 | else | |
314 | *tcbptr=0; | |
315 | tc->freeInCache-=blksize; | |
316 | assert((long) tc->freeInCache>=0); | |
317 | mspace_free(0, f); | |
318 | /*tcsanitycheck(tcbptr);*/ | |
319 | } | |
320 | } | |
321 | } | |
322 | #ifdef FULLSANITYCHECKS | |
323 | tcfullsanitycheck(tc); | |
324 | #endif | |
325 | } | |
326 | static void DestroyCaches(nedpool *p) THROWSPEC | |
327 | { | |
f0ed8226 MSO |
328 | { |
329 | threadcache *tc; | |
330 | int n; | |
331 | for(n=0; n<THREADCACHEMAXCACHES; n++) | |
332 | { | |
333 | if((tc=p->caches[n])) | |
334 | { | |
335 | tc->frees++; | |
336 | RemoveCacheEntries(p, tc, 0); | |
337 | assert(!tc->freeInCache); | |
338 | tc->mymspace=-1; | |
339 | tc->threadid=0; | |
340 | mspace_free(0, tc); | |
341 | p->caches[n]=0; | |
342 | } | |
343 | } | |
344 | } | |
345 | } | |
346 | ||
347 | static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC | |
348 | { | |
349 | threadcache *tc=0; | |
350 | int n, end; | |
351 | ACQUIRE_LOCK(&p->mutex); | |
352 | for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++); | |
353 | if(THREADCACHEMAXCACHES==n) | |
354 | { /* List exhausted, so disable for this thread */ | |
355 | RELEASE_LOCK(&p->mutex); | |
356 | return 0; | |
357 | } | |
358 | tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache)); | |
359 | if(!tc) | |
360 | { | |
361 | RELEASE_LOCK(&p->mutex); | |
362 | return 0; | |
363 | } | |
364 | #ifdef FULLSANITYCHECKS | |
365 | tc->magic1=*(unsigned int *)"NEDMALC1"; | |
366 | tc->magic2=*(unsigned int *)"NEDMALC2"; | |
367 | #endif | |
368 | tc->threadid=(long)(size_t)CURRENT_THREAD; | |
369 | for(end=0; p->m[end]; end++); | |
370 | tc->mymspace=tc->threadid % end; | |
371 | RELEASE_LOCK(&p->mutex); | |
372 | if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); | |
373 | return tc; | |
374 | } | |
375 | ||
376 | static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC | |
377 | { | |
378 | void *ret=0; | |
379 | unsigned int bestsize; | |
380 | unsigned int idx=size2binidx(*size); | |
381 | size_t blksize=0; | |
382 | threadcacheblk *blk, **binsptr; | |
383 | #ifdef FULLSANITYCHECKS | |
384 | tcfullsanitycheck(tc); | |
385 | #endif | |
386 | /* Calculate best fit bin size */ | |
387 | bestsize=1<<(idx+4); | |
388 | #if 0 | |
389 | /* Finer grained bin fit */ | |
390 | idx<<=1; | |
391 | if(*size>bestsize) | |
392 | { | |
393 | idx++; | |
394 | bestsize+=bestsize>>1; | |
395 | } | |
396 | if(*size>bestsize) | |
397 | { | |
398 | idx++; | |
399 | bestsize=1<<(4+(idx>>1)); | |
400 | } | |
401 | #else | |
402 | if(*size>bestsize) | |
403 | { | |
404 | idx++; | |
405 | bestsize<<=1; | |
406 | } | |
407 | #endif | |
408 | assert(bestsize>=*size); | |
409 | if(*size<bestsize) *size=bestsize; | |
410 | assert(*size<=THREADCACHEMAX); | |
411 | assert(idx<=THREADCACHEMAXBINS); | |
412 | binsptr=&tc->bins[idx*2]; | |
413 | /* Try to match close, but move up a bin if necessary */ | |
414 | blk=*binsptr; | |
415 | if(!blk || blk->size<*size) | |
416 | { /* Bump it up a bin */ | |
417 | if(idx<THREADCACHEMAXBINS) | |
418 | { | |
419 | idx++; | |
420 | binsptr+=2; | |
421 | blk=*binsptr; | |
422 | } | |
423 | } | |
424 | if(blk) | |
425 | { | |
426 | blksize=blk->size; /*nedblksize(blk);*/ | |
427 | assert(nedblksize(blk)>=blksize); | |
428 | assert(blksize>=*size); | |
429 | if(blk->next) | |
430 | blk->next->prev=0; | |
431 | *binsptr=blk->next; | |
432 | if(!*binsptr) | |
433 | binsptr[1]=0; | |
434 | #ifdef FULLSANITYCHECKS | |
435 | blk->magic=0; | |
436 | #endif | |
437 | assert(binsptr[0]!=blk && binsptr[1]!=blk); | |
438 | assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); | |
439 | /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/ | |
440 | ret=(void *) blk; | |
441 | } | |
442 | ++tc->mallocs; | |
443 | if(ret) | |
444 | { | |
445 | assert(blksize>=*size); | |
446 | ++tc->successes; | |
447 | tc->freeInCache-=blksize; | |
448 | assert((long) tc->freeInCache>=0); | |
449 | } | |
450 | #if defined(DEBUG) && 0 | |
451 | if(!(tc->mallocs & 0xfff)) | |
452 | { | |
453 | printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, | |
454 | (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); | |
455 | } | |
456 | #endif | |
457 | #ifdef FULLSANITYCHECKS | |
458 | tcfullsanitycheck(tc); | |
459 | #endif | |
460 | return ret; | |
461 | } | |
462 | static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC | |
463 | { | |
464 | unsigned int age=THREADCACHEMAXFREESPACE/8192; | |
465 | /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ | |
466 | while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) | |
467 | { | |
468 | RemoveCacheEntries(p, tc, age); | |
469 | /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ | |
470 | age>>=1; | |
471 | } | |
472 | /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ | |
473 | } | |
474 | static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC | |
475 | { | |
476 | unsigned int bestsize; | |
477 | unsigned int idx=size2binidx(size); | |
478 | threadcacheblk **binsptr, *tck=(threadcacheblk *) mem; | |
479 | assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); | |
480 | #ifdef DEBUG | |
481 | { /* Make sure this is a valid memory block */ | |
482 | mchunkptr p = mem2chunk(mem); | |
483 | mstate fm = get_mstate_for(p); | |
484 | if (!ok_magic(fm)) { | |
485 | USAGE_ERROR_ACTION(fm, p); | |
486 | return; | |
487 | } | |
488 | } | |
489 | #endif | |
490 | #ifdef FULLSANITYCHECKS | |
491 | tcfullsanitycheck(tc); | |
492 | #endif | |
493 | /* Calculate best fit bin size */ | |
494 | bestsize=1<<(idx+4); | |
495 | #if 0 | |
496 | /* Finer grained bin fit */ | |
497 | idx<<=1; | |
498 | if(size>bestsize) | |
499 | { | |
500 | unsigned int biggerbestsize=bestsize+bestsize<<1; | |
501 | if(size>=biggerbestsize) | |
502 | { | |
503 | idx++; | |
504 | bestsize=biggerbestsize; | |
505 | } | |
506 | } | |
507 | #endif | |
508 | if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ | |
509 | size=bestsize; | |
510 | binsptr=&tc->bins[idx*2]; | |
511 | assert(idx<=THREADCACHEMAXBINS); | |
512 | if(tck==*binsptr) | |
513 | { | |
27e0c3c6 | 514 | fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", (void *)tck); |
f0ed8226 MSO |
515 | abort(); |
516 | } | |
517 | #ifdef FULLSANITYCHECKS | |
518 | tck->magic=*(unsigned int *) "NEDN"; | |
519 | #endif | |
520 | tck->lastUsed=++tc->frees; | |
521 | tck->size=(unsigned int) size; | |
522 | tck->next=*binsptr; | |
523 | tck->prev=0; | |
524 | if(tck->next) | |
525 | tck->next->prev=tck; | |
526 | else | |
527 | binsptr[1]=tck; | |
528 | assert(!*binsptr || (*binsptr)->size==tck->size); | |
529 | *binsptr=tck; | |
530 | assert(tck==tc->bins[idx*2]); | |
531 | assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); | |
532 | /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ | |
533 | tc->freeInCache+=size; | |
534 | #ifdef FULLSANITYCHECKS | |
535 | tcfullsanitycheck(tc); | |
536 | #endif | |
537 | #if 1 | |
538 | if(tc->freeInCache>=THREADCACHEMAXFREESPACE) | |
539 | ReleaseFreeInCache(p, tc, mymspace); | |
540 | #endif | |
541 | } | |
542 | ||
543 | ||
544 | ||
545 | ||
546 | static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC | |
547 | { /* threads is -1 for system pool */ | |
548 | ensure_initialization(); | |
549 | ACQUIRE_MALLOC_GLOBAL_LOCK(); | |
550 | if(p->threads) goto done; | |
551 | if(INITIAL_LOCK(&p->mutex)) goto err; | |
552 | if(TLSALLOC(&p->mycache)) goto err; | |
553 | if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; | |
554 | p->m[0]->extp=p; | |
555 | p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; | |
556 | done: | |
557 | RELEASE_MALLOC_GLOBAL_LOCK(); | |
558 | return 1; | |
559 | err: | |
560 | if(threads<0) | |
561 | abort(); /* If you can't allocate for system pool, we're screwed */ | |
562 | DestroyCaches(p); | |
563 | if(p->m[0]) | |
564 | { | |
565 | destroy_mspace(p->m[0]); | |
566 | p->m[0]=0; | |
567 | } | |
568 | if(p->mycache) | |
569 | { | |
570 | if(TLSFREE(p->mycache)) abort(); | |
571 | p->mycache=0; | |
572 | } | |
573 | RELEASE_MALLOC_GLOBAL_LOCK(); | |
574 | return 0; | |
575 | } | |
576 | static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC | |
577 | { /* Gets called when thread's last used mspace is in use. The strategy | |
578 | is to run through the list of all available mspaces looking for an | |
579 | unlocked one and if we fail, we create a new one so long as we don't | |
580 | exceed p->threads */ | |
581 | int n, end; | |
582 | for(n=end=*lastUsed+1; p->m[n]; end=++n) | |
583 | { | |
584 | if(TRY_LOCK(&p->m[n]->mutex)) goto found; | |
585 | } | |
586 | for(n=0; n<*lastUsed && p->m[n]; n++) | |
587 | { | |
588 | if(TRY_LOCK(&p->m[n]->mutex)) goto found; | |
589 | } | |
590 | if(end<p->threads) | |
591 | { | |
592 | mstate temp; | |
593 | if(!(temp=(mstate) create_mspace(size, 1))) | |
594 | goto badexit; | |
595 | /* Now we're ready to modify the lists, we lock */ | |
596 | ACQUIRE_LOCK(&p->mutex); | |
597 | while(p->m[end] && end<p->threads) | |
598 | end++; | |
599 | if(end>=p->threads) | |
600 | { /* Drat, must destroy it now */ | |
601 | RELEASE_LOCK(&p->mutex); | |
602 | destroy_mspace((mspace) temp); | |
603 | goto badexit; | |
604 | } | |
605 | /* We really want to make sure this goes into memory now but we | |
606 | have to be careful of breaking aliasing rules, so write it twice */ | |
8e679e08 SS |
607 | { |
608 | volatile struct malloc_state **_m=(volatile struct malloc_state **) &p->m[end]; | |
609 | *_m=(p->m[end]=temp); | |
610 | } | |
f0ed8226 MSO |
611 | ACQUIRE_LOCK(&p->m[end]->mutex); |
612 | /*printf("Created mspace idx %d\n", end);*/ | |
613 | RELEASE_LOCK(&p->mutex); | |
614 | n=end; | |
615 | goto found; | |
616 | } | |
617 | /* Let it lock on the last one it used */ | |
618 | badexit: | |
619 | ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); | |
620 | return p->m[*lastUsed]; | |
621 | found: | |
622 | *lastUsed=n; | |
623 | if(tc) | |
624 | tc->mymspace=n; | |
625 | else | |
626 | { | |
627 | if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); | |
628 | } | |
629 | return p->m[n]; | |
630 | } | |
631 | ||
632 | nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC | |
633 | { | |
634 | nedpool *ret; | |
635 | if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0; | |
636 | if(!InitPool(ret, capacity, threads)) | |
637 | { | |
638 | nedpfree(0, ret); | |
639 | return 0; | |
640 | } | |
641 | return ret; | |
642 | } | |
643 | void neddestroypool(nedpool *p) THROWSPEC | |
644 | { | |
645 | int n; | |
646 | ACQUIRE_LOCK(&p->mutex); | |
647 | DestroyCaches(p); | |
648 | for(n=0; p->m[n]; n++) | |
649 | { | |
650 | destroy_mspace(p->m[n]); | |
651 | p->m[n]=0; | |
652 | } | |
653 | RELEASE_LOCK(&p->mutex); | |
654 | if(TLSFREE(p->mycache)) abort(); | |
655 | nedpfree(0, p); | |
656 | } | |
657 | ||
658 | void nedpsetvalue(nedpool *p, void *v) THROWSPEC | |
659 | { | |
660 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
661 | p->uservalue=v; | |
662 | } | |
663 | void *nedgetvalue(nedpool **p, void *mem) THROWSPEC | |
664 | { | |
665 | nedpool *np=0; | |
666 | mchunkptr mcp=mem2chunk(mem); | |
667 | mstate fm; | |
668 | if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0; | |
669 | if(!cinuse(mcp)) return 0; | |
670 | if(!next_pinuse(mcp)) return 0; | |
671 | if(!is_mmapped(mcp) && !pinuse(mcp)) | |
672 | { | |
673 | if(next_chunk(prev_chunk(mcp))!=mcp) return 0; | |
674 | } | |
675 | fm=get_mstate_for(mcp); | |
676 | if(!ok_magic(fm)) return 0; | |
677 | if(!ok_address(fm, mcp)) return 0; | |
678 | if(!fm->extp) return 0; | |
679 | np=(nedpool *) fm->extp; | |
680 | if(p) *p=np; | |
681 | return np->uservalue; | |
682 | } | |
683 | ||
684 | void neddisablethreadcache(nedpool *p) THROWSPEC | |
685 | { | |
686 | int mycache; | |
687 | if(!p) | |
688 | { | |
689 | p=&syspool; | |
690 | if(!syspool.threads) InitPool(&syspool, 0, -1); | |
691 | } | |
692 | mycache=(int)(size_t) TLSGET(p->mycache); | |
693 | if(!mycache) | |
694 | { /* Set to mspace 0 */ | |
695 | if(TLSSET(p->mycache, (void *)-1)) abort(); | |
696 | } | |
697 | else if(mycache>0) | |
698 | { /* Set to last used mspace */ | |
699 | threadcache *tc=p->caches[mycache-1]; | |
700 | #if defined(DEBUG) | |
701 | printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", | |
702 | 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); | |
703 | #endif | |
704 | if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); | |
705 | tc->frees++; | |
706 | RemoveCacheEntries(p, tc, 0); | |
707 | assert(!tc->freeInCache); | |
708 | tc->mymspace=-1; | |
709 | tc->threadid=0; | |
710 | mspace_free(0, p->caches[mycache-1]); | |
711 | p->caches[mycache-1]=0; | |
712 | } | |
713 | } | |
714 | ||
715 | #define GETMSPACE(m,p,tc,ms,s,action) \ | |
716 | do \ | |
717 | { \ | |
718 | mstate m = GetMSpace((p),(tc),(ms),(s)); \ | |
719 | action; \ | |
720 | RELEASE_LOCK(&m->mutex); \ | |
721 | } while (0) | |
722 | ||
723 | static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC | |
724 | { /* Returns a locked and ready for use mspace */ | |
725 | mstate m=p->m[mymspace]; | |
726 | assert(m); | |
727 | if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\ | |
728 | /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ | |
729 | return m; | |
730 | } | |
731 | static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC | |
732 | { | |
733 | int mycache; | |
734 | if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk); | |
735 | if(!*p) | |
736 | { | |
737 | *p=&syspool; | |
738 | if(!syspool.threads) InitPool(&syspool, 0, -1); | |
739 | } | |
740 | mycache=(int)(size_t) TLSGET((*p)->mycache); | |
741 | if(mycache>0) | |
742 | { | |
743 | *tc=(*p)->caches[mycache-1]; | |
744 | *mymspace=(*tc)->mymspace; | |
745 | } | |
746 | else if(!mycache) | |
747 | { | |
748 | *tc=AllocCache(*p); | |
749 | if(!*tc) | |
750 | { /* Disable */ | |
751 | if(TLSSET((*p)->mycache, (void *)-1)) abort(); | |
752 | *mymspace=0; | |
753 | } | |
754 | else | |
755 | *mymspace=(*tc)->mymspace; | |
756 | } | |
757 | else | |
758 | { | |
759 | *tc=0; | |
760 | *mymspace=-mycache-1; | |
761 | } | |
762 | assert(*mymspace>=0); | |
763 | assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid); | |
764 | #ifdef FULLSANITYCHECKS | |
765 | if(*tc) | |
766 | { | |
767 | if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) | |
768 | { | |
769 | abort(); | |
770 | } | |
771 | } | |
772 | #endif | |
773 | } | |
774 | ||
775 | void * nedpmalloc(nedpool *p, size_t size) THROWSPEC | |
776 | { | |
777 | void *ret=0; | |
778 | threadcache *tc; | |
779 | int mymspace; | |
780 | GetThreadCache(&p, &tc, &mymspace, &size); | |
781 | #if THREADCACHEMAX | |
782 | if(tc && size<=THREADCACHEMAX) | |
783 | { /* Use the thread cache */ | |
784 | ret=threadcache_malloc(p, tc, &size); | |
785 | } | |
786 | #endif | |
787 | if(!ret) | |
788 | { /* Use this thread's mspace */ | |
789 | GETMSPACE(m, p, tc, mymspace, size, | |
790 | ret=mspace_malloc(m, size)); | |
791 | } | |
792 | return ret; | |
793 | } | |
794 | void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC | |
795 | { | |
796 | size_t rsize=size*no; | |
797 | void *ret=0; | |
798 | threadcache *tc; | |
799 | int mymspace; | |
800 | GetThreadCache(&p, &tc, &mymspace, &rsize); | |
801 | #if THREADCACHEMAX | |
802 | if(tc && rsize<=THREADCACHEMAX) | |
803 | { /* Use the thread cache */ | |
804 | if((ret=threadcache_malloc(p, tc, &rsize))) | |
805 | memset(ret, 0, rsize); | |
806 | } | |
807 | #endif | |
808 | if(!ret) | |
809 | { /* Use this thread's mspace */ | |
810 | GETMSPACE(m, p, tc, mymspace, rsize, | |
811 | ret=mspace_calloc(m, 1, rsize)); | |
812 | } | |
813 | return ret; | |
814 | } | |
815 | void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC | |
816 | { | |
817 | void *ret=0; | |
818 | threadcache *tc; | |
819 | int mymspace; | |
820 | if(!mem) return nedpmalloc(p, size); | |
821 | GetThreadCache(&p, &tc, &mymspace, &size); | |
822 | #if THREADCACHEMAX | |
823 | if(tc && size && size<=THREADCACHEMAX) | |
824 | { /* Use the thread cache */ | |
825 | size_t memsize=nedblksize(mem); | |
826 | assert(memsize); | |
827 | if((ret=threadcache_malloc(p, tc, &size))) | |
828 | { | |
829 | memcpy(ret, mem, memsize<size ? memsize : size); | |
830 | if(memsize<=THREADCACHEMAX) | |
831 | threadcache_free(p, tc, mymspace, mem, memsize); | |
832 | else | |
833 | mspace_free(0, mem); | |
834 | } | |
835 | } | |
836 | #endif | |
837 | if(!ret) | |
838 | { /* Reallocs always happen in the mspace they happened in, so skip | |
839 | locking the preferred mspace for this thread */ | |
840 | ret=mspace_realloc(0, mem, size); | |
841 | } | |
842 | return ret; | |
843 | } | |
844 | void nedpfree(nedpool *p, void *mem) THROWSPEC | |
845 | { /* Frees always happen in the mspace they happened in, so skip | |
846 | locking the preferred mspace for this thread */ | |
847 | threadcache *tc; | |
848 | int mymspace; | |
849 | size_t memsize; | |
850 | assert(mem); | |
851 | GetThreadCache(&p, &tc, &mymspace, 0); | |
852 | #if THREADCACHEMAX | |
853 | memsize=nedblksize(mem); | |
854 | assert(memsize); | |
855 | if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) | |
856 | threadcache_free(p, tc, mymspace, mem, memsize); | |
857 | else | |
858 | #endif | |
859 | mspace_free(0, mem); | |
860 | } | |
861 | void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC | |
862 | { | |
863 | void *ret; | |
864 | threadcache *tc; | |
865 | int mymspace; | |
866 | GetThreadCache(&p, &tc, &mymspace, &bytes); | |
867 | { /* Use this thread's mspace */ | |
868 | GETMSPACE(m, p, tc, mymspace, bytes, | |
869 | ret=mspace_memalign(m, alignment, bytes)); | |
870 | } | |
871 | return ret; | |
872 | } | |
873 | #if !NO_MALLINFO | |
874 | struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC | |
875 | { | |
876 | int n; | |
877 | struct mallinfo ret={0}; | |
878 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
879 | for(n=0; p->m[n]; n++) | |
880 | { | |
881 | struct mallinfo t=mspace_mallinfo(p->m[n]); | |
882 | ret.arena+=t.arena; | |
883 | ret.ordblks+=t.ordblks; | |
884 | ret.hblkhd+=t.hblkhd; | |
885 | ret.usmblks+=t.usmblks; | |
886 | ret.uordblks+=t.uordblks; | |
887 | ret.fordblks+=t.fordblks; | |
888 | ret.keepcost+=t.keepcost; | |
889 | } | |
890 | return ret; | |
891 | } | |
892 | #endif | |
893 | int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC | |
894 | { | |
895 | return mspace_mallopt(parno, value); | |
896 | } | |
897 | int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC | |
898 | { | |
899 | int n, ret=0; | |
900 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
901 | for(n=0; p->m[n]; n++) | |
902 | { | |
903 | ret+=mspace_trim(p->m[n], pad); | |
904 | } | |
905 | return ret; | |
906 | } | |
907 | void nedpmalloc_stats(nedpool *p) THROWSPEC | |
908 | { | |
909 | int n; | |
910 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
911 | for(n=0; p->m[n]; n++) | |
912 | { | |
913 | mspace_malloc_stats(p->m[n]); | |
914 | } | |
915 | } | |
916 | size_t nedpmalloc_footprint(nedpool *p) THROWSPEC | |
917 | { | |
918 | size_t ret=0; | |
919 | int n; | |
920 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
921 | for(n=0; p->m[n]; n++) | |
922 | { | |
923 | ret+=mspace_footprint(p->m[n]); | |
924 | } | |
925 | return ret; | |
926 | } | |
927 | void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC | |
928 | { | |
929 | void **ret; | |
930 | threadcache *tc; | |
931 | int mymspace; | |
932 | GetThreadCache(&p, &tc, &mymspace, &elemsize); | |
933 | GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, | |
934 | ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); | |
935 | return ret; | |
936 | } | |
937 | void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC | |
938 | { | |
939 | void **ret; | |
940 | threadcache *tc; | |
941 | int mymspace; | |
1e701059 JS |
942 | size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); |
943 | if(!adjustedsizes) return 0; | |
944 | for(i=0; i<elems; i++) | |
945 | adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i]; | |
f0ed8226 MSO |
946 | GetThreadCache(&p, &tc, &mymspace, 0); |
947 | GETMSPACE(m, p, tc, mymspace, 0, | |
948 | ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks)); | |
949 | return ret; | |
950 | } | |
951 | ||
f0ed8226 MSO |
952 | #if defined(__cplusplus) |
953 | } | |
954 | #endif |