]>
Commit | Line | Data |
---|---|---|
f0ed8226 MSO |
1 | /* Alternative malloc implementation for multiple threads without |
2 | lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas | |
3 | ||
4 | Boost Software License - Version 1.0 - August 17th, 2003 | |
5 | ||
6 | Permission is hereby granted, free of charge, to any person or organization | |
7 | obtaining a copy of the software and accompanying documentation covered by | |
8 | this license (the "Software") to use, reproduce, display, distribute, | |
9 | execute, and transmit the Software, and to prepare derivative works of the | |
10 | Software, and to permit third-parties to whom the Software is furnished to | |
11 | do so, all subject to the following: | |
12 | ||
13 | The copyright notices in the Software and this entire statement, including | |
14 | the above license grant, this restriction and the following disclaimer, | |
15 | must be included in all copies of the Software, in whole or in part, and | |
16 | all derivative works of the Software, unless such copies or derivative | |
17 | works are solely in the form of machine-executable object code generated by | |
18 | a source language processor. | |
19 | ||
20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
22 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | |
23 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | |
24 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | |
25 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
26 | DEALINGS IN THE SOFTWARE. | |
27 | */ | |
28 | ||
29 | #ifdef _MSC_VER | |
30 | /* Enable full aliasing on MSVC */ | |
31 | /*#pragma optimize("a", on)*/ | |
32 | #endif | |
33 | ||
34 | /*#define FULLSANITYCHECKS*/ | |
35 | ||
36 | #include "nedmalloc.h" | |
a21077e7 | 37 | #if defined(WIN32) |
f0ed8226 MSO |
38 | #include <malloc.h> |
39 | #endif | |
40 | #define MSPACES 1 | |
41 | #define ONLY_MSPACES 1 | |
42 | #ifndef USE_LOCKS | |
43 | #define USE_LOCKS 1 | |
44 | #endif | |
45 | #define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ | |
46 | #undef DEBUG /* dlmalloc wants DEBUG either 0 or 1 */ | |
47 | #ifdef _DEBUG | |
48 | #define DEBUG 1 | |
49 | #else | |
50 | #define DEBUG 0 | |
51 | #endif | |
52 | #ifdef NDEBUG /* Disable assert checking on release builds */ | |
53 | #undef DEBUG | |
54 | #endif | |
55 | /* The default of 64Kb means we spend too much time kernel-side */ | |
56 | #ifndef DEFAULT_GRANULARITY | |
57 | #define DEFAULT_GRANULARITY (1*1024*1024) | |
58 | #endif | |
59 | /*#define USE_SPIN_LOCKS 0*/ | |
60 | ||
61 | ||
62 | /*#define FORCEINLINE*/ | |
63 | #include "malloc.c.h" | |
64 | #ifdef NDEBUG /* Disable assert checking on release builds */ | |
65 | #undef DEBUG | |
66 | #endif | |
67 | ||
68 | /* The maximum concurrent threads in a pool possible */ | |
69 | #ifndef MAXTHREADSINPOOL | |
70 | #define MAXTHREADSINPOOL 16 | |
71 | #endif | |
72 | /* The maximum number of threadcaches which can be allocated */ | |
73 | #ifndef THREADCACHEMAXCACHES | |
74 | #define THREADCACHEMAXCACHES 256 | |
75 | #endif | |
76 | /* The maximum size to be allocated from the thread cache */ | |
77 | #ifndef THREADCACHEMAX | |
78 | #define THREADCACHEMAX 8192 | |
79 | #endif | |
80 | #if 0 | |
81 | /* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ | |
82 | #define THREADCACHEMAXBINS ((13-4)*2) | |
83 | #else | |
84 | /* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ | |
85 | #define THREADCACHEMAXBINS (13-4) | |
86 | #endif | |
87 | /* Point at which the free space in a thread cache is garbage collected */ | |
88 | #ifndef THREADCACHEMAXFREESPACE | |
89 | #define THREADCACHEMAXFREESPACE (512*1024) | |
90 | #endif | |
91 | ||
92 | ||
93 | #ifdef WIN32 | |
94 | #define TLSVAR DWORD | |
95 | #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) | |
96 | #define TLSFREE(k) (!TlsFree(k)) | |
97 | #define TLSGET(k) TlsGetValue(k) | |
98 | #define TLSSET(k, a) (!TlsSetValue(k, a)) | |
99 | #ifdef DEBUG | |
100 | static LPVOID ChkedTlsGetValue(DWORD idx) | |
101 | { | |
102 | LPVOID ret=TlsGetValue(idx); | |
103 | assert(S_OK==GetLastError()); | |
104 | return ret; | |
105 | } | |
106 | #undef TLSGET | |
107 | #define TLSGET(k) ChkedTlsGetValue(k) | |
108 | #endif | |
109 | #else | |
110 | #define TLSVAR pthread_key_t | |
111 | #define TLSALLOC(k) pthread_key_create(k, 0) | |
112 | #define TLSFREE(k) pthread_key_delete(k) | |
113 | #define TLSGET(k) pthread_getspecific(k) | |
114 | #define TLSSET(k, a) pthread_setspecific(k, a) | |
115 | #endif | |
116 | ||
117 | #if 0 | |
118 | /* Only enable if testing with valgrind. Causes misoperation */ | |
119 | #define mspace_malloc(p, s) malloc(s) | |
120 | #define mspace_realloc(p, m, s) realloc(m, s) | |
121 | #define mspace_calloc(p, n, s) calloc(n, s) | |
122 | #define mspace_free(p, m) free(m) | |
123 | #endif | |
124 | ||
125 | ||
126 | #if defined(__cplusplus) | |
127 | #if !defined(NO_NED_NAMESPACE) | |
128 | namespace nedalloc { | |
129 | #else | |
130 | extern "C" { | |
131 | #endif | |
132 | #endif | |
133 | ||
134 | size_t nedblksize(void *mem) THROWSPEC | |
135 | { | |
136 | #if 0 | |
137 | /* Only enable if testing with valgrind. Causes misoperation */ | |
138 | return THREADCACHEMAX; | |
139 | #else | |
140 | if(mem) | |
141 | { | |
142 | mchunkptr p=mem2chunk(mem); | |
143 | assert(cinuse(p)); /* If this fails, someone tried to free a block twice */ | |
144 | if(cinuse(p)) | |
145 | return chunksize(p)-overhead_for(p); | |
146 | } | |
147 | return 0; | |
148 | #endif | |
149 | } | |
150 | ||
151 | void nedsetvalue(void *v) THROWSPEC { nedpsetvalue(0, v); } | |
152 | void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc(0, size); } | |
153 | void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc(0, no, size); } | |
154 | void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc(0, mem, size); } | |
155 | void nedfree(void *mem) THROWSPEC { nedpfree(0, mem); } | |
156 | void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); } | |
157 | #if !NO_MALLINFO | |
158 | struct mallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo(0); } | |
159 | #endif | |
160 | int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt(0, parno, value); } | |
161 | int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim(0, pad); } | |
241c957d RJ |
162 | void nedmalloc_stats(void) THROWSPEC { nedpmalloc_stats(0); } |
163 | size_t nedmalloc_footprint(void) THROWSPEC { return nedpmalloc_footprint(0); } | |
f0ed8226 MSO |
164 | void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc(0, elemsno, elemsize, chunks); } |
165 | void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc(0, elems, sizes, chunks); } | |
166 | ||
167 | struct threadcacheblk_t; | |
168 | typedef struct threadcacheblk_t threadcacheblk; | |
169 | struct threadcacheblk_t | |
170 | { /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ | |
171 | #ifdef FULLSANITYCHECKS | |
172 | unsigned int magic; | |
173 | #endif | |
174 | unsigned int lastUsed, size; | |
175 | threadcacheblk *next, *prev; | |
176 | }; | |
177 | typedef struct threadcache_t | |
178 | { | |
179 | #ifdef FULLSANITYCHECKS | |
180 | unsigned int magic1; | |
181 | #endif | |
182 | int mymspace; /* Last mspace entry this thread used */ | |
183 | long threadid; | |
184 | unsigned int mallocs, frees, successes; | |
185 | size_t freeInCache; /* How much free space is stored in this cache */ | |
186 | threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; | |
187 | #ifdef FULLSANITYCHECKS | |
188 | unsigned int magic2; | |
189 | #endif | |
190 | } threadcache; | |
191 | struct nedpool_t | |
192 | { | |
193 | MLOCK_T mutex; | |
194 | void *uservalue; | |
195 | int threads; /* Max entries in m to use */ | |
196 | threadcache *caches[THREADCACHEMAXCACHES]; | |
197 | TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ | |
198 | mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ | |
199 | }; | |
200 | static nedpool syspool; | |
201 | ||
202 | static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC | |
203 | { /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ | |
204 | unsigned int topbit, size=(unsigned int)(_size>>4); | |
205 | /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ | |
206 | ||
207 | #if defined(__GNUC__) | |
208 | topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); | |
209 | #elif defined(_MSC_VER) && _MSC_VER>=1300 | |
210 | { | |
211 | unsigned long bsrTopBit; | |
212 | ||
213 | _BitScanReverse(&bsrTopBit, size); | |
214 | ||
215 | topbit = bsrTopBit; | |
216 | } | |
217 | #else | |
218 | #if 0 | |
219 | union { | |
220 | unsigned asInt[2]; | |
221 | double asDouble; | |
222 | }; | |
223 | int n; | |
224 | ||
225 | asDouble = (double)size + 0.5; | |
226 | topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; | |
227 | #else | |
228 | { | |
229 | unsigned int x=size; | |
230 | x = x | (x >> 1); | |
231 | x = x | (x >> 2); | |
232 | x = x | (x >> 4); | |
233 | x = x | (x >> 8); | |
234 | x = x | (x >>16); | |
235 | x = ~x; | |
236 | x = x - ((x >> 1) & 0x55555555); | |
237 | x = (x & 0x33333333) + ((x >> 2) & 0x33333333); | |
238 | x = (x + (x >> 4)) & 0x0F0F0F0F; | |
239 | x = x + (x << 8); | |
240 | x = x + (x << 16); | |
241 | topbit=31 - (x >> 24); | |
242 | } | |
243 | #endif | |
244 | #endif | |
245 | return topbit; | |
246 | } | |
247 | ||
248 | ||
249 | #ifdef FULLSANITYCHECKS | |
250 | static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC | |
251 | { | |
252 | assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); | |
253 | if(ptr[0] && ptr[1]) | |
254 | { | |
255 | assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); | |
256 | assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); | |
257 | assert(*(unsigned int *) "NEDN"==ptr[0]->magic); | |
258 | assert(*(unsigned int *) "NEDN"==ptr[1]->magic); | |
259 | assert(!ptr[0]->prev); | |
260 | assert(!ptr[1]->next); | |
261 | if(ptr[0]==ptr[1]) | |
262 | { | |
263 | assert(!ptr[0]->next); | |
264 | assert(!ptr[1]->prev); | |
265 | } | |
266 | } | |
267 | } | |
268 | static void tcfullsanitycheck(threadcache *tc) THROWSPEC | |
269 | { | |
270 | threadcacheblk **tcbptr=tc->bins; | |
271 | int n; | |
272 | for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) | |
273 | { | |
274 | threadcacheblk *b, *ob=0; | |
275 | tcsanitycheck(tcbptr); | |
276 | for(b=tcbptr[0]; b; ob=b, b=b->next) | |
277 | { | |
278 | assert(*(unsigned int *) "NEDN"==b->magic); | |
279 | assert(!ob || ob->next==b); | |
280 | assert(!ob || b->prev==ob); | |
281 | } | |
282 | } | |
283 | } | |
284 | #endif | |
285 | ||
286 | static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC | |
287 | { | |
288 | #ifdef FULLSANITYCHECKS | |
289 | tcfullsanitycheck(tc); | |
290 | #endif | |
291 | if(tc->freeInCache) | |
292 | { | |
293 | threadcacheblk **tcbptr=tc->bins; | |
294 | int n; | |
295 | for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) | |
296 | { | |
297 | threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ | |
298 | /*tcsanitycheck(tcbptr);*/ | |
299 | for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) | |
300 | { | |
301 | threadcacheblk *f=*tcb; | |
302 | size_t blksize=f->size; /*nedblksize(f);*/ | |
303 | assert(blksize<=nedblksize(f)); | |
304 | assert(blksize); | |
305 | #ifdef FULLSANITYCHECKS | |
306 | assert(*(unsigned int *) "NEDN"==(*tcb)->magic); | |
307 | #endif | |
308 | *tcb=(*tcb)->prev; | |
309 | if(*tcb) | |
310 | (*tcb)->next=0; | |
311 | else | |
312 | *tcbptr=0; | |
313 | tc->freeInCache-=blksize; | |
314 | assert((long) tc->freeInCache>=0); | |
315 | mspace_free(0, f); | |
316 | /*tcsanitycheck(tcbptr);*/ | |
317 | } | |
318 | } | |
319 | } | |
320 | #ifdef FULLSANITYCHECKS | |
321 | tcfullsanitycheck(tc); | |
322 | #endif | |
323 | } | |
324 | static void DestroyCaches(nedpool *p) THROWSPEC | |
325 | { | |
326 | if(p->caches) | |
327 | { | |
328 | threadcache *tc; | |
329 | int n; | |
330 | for(n=0; n<THREADCACHEMAXCACHES; n++) | |
331 | { | |
332 | if((tc=p->caches[n])) | |
333 | { | |
334 | tc->frees++; | |
335 | RemoveCacheEntries(p, tc, 0); | |
336 | assert(!tc->freeInCache); | |
337 | tc->mymspace=-1; | |
338 | tc->threadid=0; | |
339 | mspace_free(0, tc); | |
340 | p->caches[n]=0; | |
341 | } | |
342 | } | |
343 | } | |
344 | } | |
345 | ||
346 | static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC | |
347 | { | |
348 | threadcache *tc=0; | |
349 | int n, end; | |
350 | ACQUIRE_LOCK(&p->mutex); | |
351 | for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++); | |
352 | if(THREADCACHEMAXCACHES==n) | |
353 | { /* List exhausted, so disable for this thread */ | |
354 | RELEASE_LOCK(&p->mutex); | |
355 | return 0; | |
356 | } | |
357 | tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache)); | |
358 | if(!tc) | |
359 | { | |
360 | RELEASE_LOCK(&p->mutex); | |
361 | return 0; | |
362 | } | |
363 | #ifdef FULLSANITYCHECKS | |
364 | tc->magic1=*(unsigned int *)"NEDMALC1"; | |
365 | tc->magic2=*(unsigned int *)"NEDMALC2"; | |
366 | #endif | |
367 | tc->threadid=(long)(size_t)CURRENT_THREAD; | |
368 | for(end=0; p->m[end]; end++); | |
369 | tc->mymspace=tc->threadid % end; | |
370 | RELEASE_LOCK(&p->mutex); | |
371 | if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); | |
372 | return tc; | |
373 | } | |
374 | ||
375 | static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC | |
376 | { | |
377 | void *ret=0; | |
378 | unsigned int bestsize; | |
379 | unsigned int idx=size2binidx(*size); | |
380 | size_t blksize=0; | |
381 | threadcacheblk *blk, **binsptr; | |
382 | #ifdef FULLSANITYCHECKS | |
383 | tcfullsanitycheck(tc); | |
384 | #endif | |
385 | /* Calculate best fit bin size */ | |
386 | bestsize=1<<(idx+4); | |
387 | #if 0 | |
388 | /* Finer grained bin fit */ | |
389 | idx<<=1; | |
390 | if(*size>bestsize) | |
391 | { | |
392 | idx++; | |
393 | bestsize+=bestsize>>1; | |
394 | } | |
395 | if(*size>bestsize) | |
396 | { | |
397 | idx++; | |
398 | bestsize=1<<(4+(idx>>1)); | |
399 | } | |
400 | #else | |
401 | if(*size>bestsize) | |
402 | { | |
403 | idx++; | |
404 | bestsize<<=1; | |
405 | } | |
406 | #endif | |
407 | assert(bestsize>=*size); | |
408 | if(*size<bestsize) *size=bestsize; | |
409 | assert(*size<=THREADCACHEMAX); | |
410 | assert(idx<=THREADCACHEMAXBINS); | |
411 | binsptr=&tc->bins[idx*2]; | |
412 | /* Try to match close, but move up a bin if necessary */ | |
413 | blk=*binsptr; | |
414 | if(!blk || blk->size<*size) | |
415 | { /* Bump it up a bin */ | |
416 | if(idx<THREADCACHEMAXBINS) | |
417 | { | |
418 | idx++; | |
419 | binsptr+=2; | |
420 | blk=*binsptr; | |
421 | } | |
422 | } | |
423 | if(blk) | |
424 | { | |
425 | blksize=blk->size; /*nedblksize(blk);*/ | |
426 | assert(nedblksize(blk)>=blksize); | |
427 | assert(blksize>=*size); | |
428 | if(blk->next) | |
429 | blk->next->prev=0; | |
430 | *binsptr=blk->next; | |
431 | if(!*binsptr) | |
432 | binsptr[1]=0; | |
433 | #ifdef FULLSANITYCHECKS | |
434 | blk->magic=0; | |
435 | #endif | |
436 | assert(binsptr[0]!=blk && binsptr[1]!=blk); | |
437 | assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); | |
438 | /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/ | |
439 | ret=(void *) blk; | |
440 | } | |
441 | ++tc->mallocs; | |
442 | if(ret) | |
443 | { | |
444 | assert(blksize>=*size); | |
445 | ++tc->successes; | |
446 | tc->freeInCache-=blksize; | |
447 | assert((long) tc->freeInCache>=0); | |
448 | } | |
449 | #if defined(DEBUG) && 0 | |
450 | if(!(tc->mallocs & 0xfff)) | |
451 | { | |
452 | printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, | |
453 | (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); | |
454 | } | |
455 | #endif | |
456 | #ifdef FULLSANITYCHECKS | |
457 | tcfullsanitycheck(tc); | |
458 | #endif | |
459 | return ret; | |
460 | } | |
461 | static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC | |
462 | { | |
463 | unsigned int age=THREADCACHEMAXFREESPACE/8192; | |
464 | /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ | |
465 | while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) | |
466 | { | |
467 | RemoveCacheEntries(p, tc, age); | |
468 | /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ | |
469 | age>>=1; | |
470 | } | |
471 | /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ | |
472 | } | |
473 | static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC | |
474 | { | |
475 | unsigned int bestsize; | |
476 | unsigned int idx=size2binidx(size); | |
477 | threadcacheblk **binsptr, *tck=(threadcacheblk *) mem; | |
478 | assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); | |
479 | #ifdef DEBUG | |
480 | { /* Make sure this is a valid memory block */ | |
481 | mchunkptr p = mem2chunk(mem); | |
482 | mstate fm = get_mstate_for(p); | |
483 | if (!ok_magic(fm)) { | |
484 | USAGE_ERROR_ACTION(fm, p); | |
485 | return; | |
486 | } | |
487 | } | |
488 | #endif | |
489 | #ifdef FULLSANITYCHECKS | |
490 | tcfullsanitycheck(tc); | |
491 | #endif | |
492 | /* Calculate best fit bin size */ | |
493 | bestsize=1<<(idx+4); | |
494 | #if 0 | |
495 | /* Finer grained bin fit */ | |
496 | idx<<=1; | |
497 | if(size>bestsize) | |
498 | { | |
499 | unsigned int biggerbestsize=bestsize+bestsize<<1; | |
500 | if(size>=biggerbestsize) | |
501 | { | |
502 | idx++; | |
503 | bestsize=biggerbestsize; | |
504 | } | |
505 | } | |
506 | #endif | |
507 | if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ | |
508 | size=bestsize; | |
509 | binsptr=&tc->bins[idx*2]; | |
510 | assert(idx<=THREADCACHEMAXBINS); | |
511 | if(tck==*binsptr) | |
512 | { | |
513 | fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck); | |
514 | abort(); | |
515 | } | |
516 | #ifdef FULLSANITYCHECKS | |
517 | tck->magic=*(unsigned int *) "NEDN"; | |
518 | #endif | |
519 | tck->lastUsed=++tc->frees; | |
520 | tck->size=(unsigned int) size; | |
521 | tck->next=*binsptr; | |
522 | tck->prev=0; | |
523 | if(tck->next) | |
524 | tck->next->prev=tck; | |
525 | else | |
526 | binsptr[1]=tck; | |
527 | assert(!*binsptr || (*binsptr)->size==tck->size); | |
528 | *binsptr=tck; | |
529 | assert(tck==tc->bins[idx*2]); | |
530 | assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); | |
531 | /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ | |
532 | tc->freeInCache+=size; | |
533 | #ifdef FULLSANITYCHECKS | |
534 | tcfullsanitycheck(tc); | |
535 | #endif | |
536 | #if 1 | |
537 | if(tc->freeInCache>=THREADCACHEMAXFREESPACE) | |
538 | ReleaseFreeInCache(p, tc, mymspace); | |
539 | #endif | |
540 | } | |
541 | ||
542 | ||
543 | ||
544 | ||
545 | static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC | |
546 | { /* threads is -1 for system pool */ | |
547 | ensure_initialization(); | |
548 | ACQUIRE_MALLOC_GLOBAL_LOCK(); | |
549 | if(p->threads) goto done; | |
550 | if(INITIAL_LOCK(&p->mutex)) goto err; | |
551 | if(TLSALLOC(&p->mycache)) goto err; | |
552 | if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; | |
553 | p->m[0]->extp=p; | |
554 | p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; | |
555 | done: | |
556 | RELEASE_MALLOC_GLOBAL_LOCK(); | |
557 | return 1; | |
558 | err: | |
559 | if(threads<0) | |
560 | abort(); /* If you can't allocate for system pool, we're screwed */ | |
561 | DestroyCaches(p); | |
562 | if(p->m[0]) | |
563 | { | |
564 | destroy_mspace(p->m[0]); | |
565 | p->m[0]=0; | |
566 | } | |
567 | if(p->mycache) | |
568 | { | |
569 | if(TLSFREE(p->mycache)) abort(); | |
570 | p->mycache=0; | |
571 | } | |
572 | RELEASE_MALLOC_GLOBAL_LOCK(); | |
573 | return 0; | |
574 | } | |
575 | static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC | |
576 | { /* Gets called when thread's last used mspace is in use. The strategy | |
577 | is to run through the list of all available mspaces looking for an | |
578 | unlocked one and if we fail, we create a new one so long as we don't | |
579 | exceed p->threads */ | |
580 | int n, end; | |
581 | for(n=end=*lastUsed+1; p->m[n]; end=++n) | |
582 | { | |
583 | if(TRY_LOCK(&p->m[n]->mutex)) goto found; | |
584 | } | |
585 | for(n=0; n<*lastUsed && p->m[n]; n++) | |
586 | { | |
587 | if(TRY_LOCK(&p->m[n]->mutex)) goto found; | |
588 | } | |
589 | if(end<p->threads) | |
590 | { | |
591 | mstate temp; | |
592 | if(!(temp=(mstate) create_mspace(size, 1))) | |
593 | goto badexit; | |
594 | /* Now we're ready to modify the lists, we lock */ | |
595 | ACQUIRE_LOCK(&p->mutex); | |
596 | while(p->m[end] && end<p->threads) | |
597 | end++; | |
598 | if(end>=p->threads) | |
599 | { /* Drat, must destroy it now */ | |
600 | RELEASE_LOCK(&p->mutex); | |
601 | destroy_mspace((mspace) temp); | |
602 | goto badexit; | |
603 | } | |
604 | /* We really want to make sure this goes into memory now but we | |
605 | have to be careful of breaking aliasing rules, so write it twice */ | |
8e679e08 SS |
606 | { |
607 | volatile struct malloc_state **_m=(volatile struct malloc_state **) &p->m[end]; | |
608 | *_m=(p->m[end]=temp); | |
609 | } | |
f0ed8226 MSO |
610 | ACQUIRE_LOCK(&p->m[end]->mutex); |
611 | /*printf("Created mspace idx %d\n", end);*/ | |
612 | RELEASE_LOCK(&p->mutex); | |
613 | n=end; | |
614 | goto found; | |
615 | } | |
616 | /* Let it lock on the last one it used */ | |
617 | badexit: | |
618 | ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); | |
619 | return p->m[*lastUsed]; | |
620 | found: | |
621 | *lastUsed=n; | |
622 | if(tc) | |
623 | tc->mymspace=n; | |
624 | else | |
625 | { | |
626 | if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); | |
627 | } | |
628 | return p->m[n]; | |
629 | } | |
630 | ||
631 | nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC | |
632 | { | |
633 | nedpool *ret; | |
634 | if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0; | |
635 | if(!InitPool(ret, capacity, threads)) | |
636 | { | |
637 | nedpfree(0, ret); | |
638 | return 0; | |
639 | } | |
640 | return ret; | |
641 | } | |
642 | void neddestroypool(nedpool *p) THROWSPEC | |
643 | { | |
644 | int n; | |
645 | ACQUIRE_LOCK(&p->mutex); | |
646 | DestroyCaches(p); | |
647 | for(n=0; p->m[n]; n++) | |
648 | { | |
649 | destroy_mspace(p->m[n]); | |
650 | p->m[n]=0; | |
651 | } | |
652 | RELEASE_LOCK(&p->mutex); | |
653 | if(TLSFREE(p->mycache)) abort(); | |
654 | nedpfree(0, p); | |
655 | } | |
656 | ||
657 | void nedpsetvalue(nedpool *p, void *v) THROWSPEC | |
658 | { | |
659 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
660 | p->uservalue=v; | |
661 | } | |
662 | void *nedgetvalue(nedpool **p, void *mem) THROWSPEC | |
663 | { | |
664 | nedpool *np=0; | |
665 | mchunkptr mcp=mem2chunk(mem); | |
666 | mstate fm; | |
667 | if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0; | |
668 | if(!cinuse(mcp)) return 0; | |
669 | if(!next_pinuse(mcp)) return 0; | |
670 | if(!is_mmapped(mcp) && !pinuse(mcp)) | |
671 | { | |
672 | if(next_chunk(prev_chunk(mcp))!=mcp) return 0; | |
673 | } | |
674 | fm=get_mstate_for(mcp); | |
675 | if(!ok_magic(fm)) return 0; | |
676 | if(!ok_address(fm, mcp)) return 0; | |
677 | if(!fm->extp) return 0; | |
678 | np=(nedpool *) fm->extp; | |
679 | if(p) *p=np; | |
680 | return np->uservalue; | |
681 | } | |
682 | ||
683 | void neddisablethreadcache(nedpool *p) THROWSPEC | |
684 | { | |
685 | int mycache; | |
686 | if(!p) | |
687 | { | |
688 | p=&syspool; | |
689 | if(!syspool.threads) InitPool(&syspool, 0, -1); | |
690 | } | |
691 | mycache=(int)(size_t) TLSGET(p->mycache); | |
692 | if(!mycache) | |
693 | { /* Set to mspace 0 */ | |
694 | if(TLSSET(p->mycache, (void *)-1)) abort(); | |
695 | } | |
696 | else if(mycache>0) | |
697 | { /* Set to last used mspace */ | |
698 | threadcache *tc=p->caches[mycache-1]; | |
699 | #if defined(DEBUG) | |
700 | printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", | |
701 | 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); | |
702 | #endif | |
703 | if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); | |
704 | tc->frees++; | |
705 | RemoveCacheEntries(p, tc, 0); | |
706 | assert(!tc->freeInCache); | |
707 | tc->mymspace=-1; | |
708 | tc->threadid=0; | |
709 | mspace_free(0, p->caches[mycache-1]); | |
710 | p->caches[mycache-1]=0; | |
711 | } | |
712 | } | |
713 | ||
714 | #define GETMSPACE(m,p,tc,ms,s,action) \ | |
715 | do \ | |
716 | { \ | |
717 | mstate m = GetMSpace((p),(tc),(ms),(s)); \ | |
718 | action; \ | |
719 | RELEASE_LOCK(&m->mutex); \ | |
720 | } while (0) | |
721 | ||
722 | static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC | |
723 | { /* Returns a locked and ready for use mspace */ | |
724 | mstate m=p->m[mymspace]; | |
725 | assert(m); | |
726 | if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\ | |
727 | /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ | |
728 | return m; | |
729 | } | |
730 | static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC | |
731 | { | |
732 | int mycache; | |
733 | if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk); | |
734 | if(!*p) | |
735 | { | |
736 | *p=&syspool; | |
737 | if(!syspool.threads) InitPool(&syspool, 0, -1); | |
738 | } | |
739 | mycache=(int)(size_t) TLSGET((*p)->mycache); | |
740 | if(mycache>0) | |
741 | { | |
742 | *tc=(*p)->caches[mycache-1]; | |
743 | *mymspace=(*tc)->mymspace; | |
744 | } | |
745 | else if(!mycache) | |
746 | { | |
747 | *tc=AllocCache(*p); | |
748 | if(!*tc) | |
749 | { /* Disable */ | |
750 | if(TLSSET((*p)->mycache, (void *)-1)) abort(); | |
751 | *mymspace=0; | |
752 | } | |
753 | else | |
754 | *mymspace=(*tc)->mymspace; | |
755 | } | |
756 | else | |
757 | { | |
758 | *tc=0; | |
759 | *mymspace=-mycache-1; | |
760 | } | |
761 | assert(*mymspace>=0); | |
762 | assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid); | |
763 | #ifdef FULLSANITYCHECKS | |
764 | if(*tc) | |
765 | { | |
766 | if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) | |
767 | { | |
768 | abort(); | |
769 | } | |
770 | } | |
771 | #endif | |
772 | } | |
773 | ||
774 | void * nedpmalloc(nedpool *p, size_t size) THROWSPEC | |
775 | { | |
776 | void *ret=0; | |
777 | threadcache *tc; | |
778 | int mymspace; | |
779 | GetThreadCache(&p, &tc, &mymspace, &size); | |
780 | #if THREADCACHEMAX | |
781 | if(tc && size<=THREADCACHEMAX) | |
782 | { /* Use the thread cache */ | |
783 | ret=threadcache_malloc(p, tc, &size); | |
784 | } | |
785 | #endif | |
786 | if(!ret) | |
787 | { /* Use this thread's mspace */ | |
788 | GETMSPACE(m, p, tc, mymspace, size, | |
789 | ret=mspace_malloc(m, size)); | |
790 | } | |
791 | return ret; | |
792 | } | |
793 | void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC | |
794 | { | |
795 | size_t rsize=size*no; | |
796 | void *ret=0; | |
797 | threadcache *tc; | |
798 | int mymspace; | |
799 | GetThreadCache(&p, &tc, &mymspace, &rsize); | |
800 | #if THREADCACHEMAX | |
801 | if(tc && rsize<=THREADCACHEMAX) | |
802 | { /* Use the thread cache */ | |
803 | if((ret=threadcache_malloc(p, tc, &rsize))) | |
804 | memset(ret, 0, rsize); | |
805 | } | |
806 | #endif | |
807 | if(!ret) | |
808 | { /* Use this thread's mspace */ | |
809 | GETMSPACE(m, p, tc, mymspace, rsize, | |
810 | ret=mspace_calloc(m, 1, rsize)); | |
811 | } | |
812 | return ret; | |
813 | } | |
814 | void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC | |
815 | { | |
816 | void *ret=0; | |
817 | threadcache *tc; | |
818 | int mymspace; | |
819 | if(!mem) return nedpmalloc(p, size); | |
820 | GetThreadCache(&p, &tc, &mymspace, &size); | |
821 | #if THREADCACHEMAX | |
822 | if(tc && size && size<=THREADCACHEMAX) | |
823 | { /* Use the thread cache */ | |
824 | size_t memsize=nedblksize(mem); | |
825 | assert(memsize); | |
826 | if((ret=threadcache_malloc(p, tc, &size))) | |
827 | { | |
828 | memcpy(ret, mem, memsize<size ? memsize : size); | |
829 | if(memsize<=THREADCACHEMAX) | |
830 | threadcache_free(p, tc, mymspace, mem, memsize); | |
831 | else | |
832 | mspace_free(0, mem); | |
833 | } | |
834 | } | |
835 | #endif | |
836 | if(!ret) | |
837 | { /* Reallocs always happen in the mspace they happened in, so skip | |
838 | locking the preferred mspace for this thread */ | |
839 | ret=mspace_realloc(0, mem, size); | |
840 | } | |
841 | return ret; | |
842 | } | |
843 | void nedpfree(nedpool *p, void *mem) THROWSPEC | |
844 | { /* Frees always happen in the mspace they happened in, so skip | |
845 | locking the preferred mspace for this thread */ | |
846 | threadcache *tc; | |
847 | int mymspace; | |
848 | size_t memsize; | |
849 | assert(mem); | |
850 | GetThreadCache(&p, &tc, &mymspace, 0); | |
851 | #if THREADCACHEMAX | |
852 | memsize=nedblksize(mem); | |
853 | assert(memsize); | |
854 | if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD)) | |
855 | threadcache_free(p, tc, mymspace, mem, memsize); | |
856 | else | |
857 | #endif | |
858 | mspace_free(0, mem); | |
859 | } | |
860 | void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC | |
861 | { | |
862 | void *ret; | |
863 | threadcache *tc; | |
864 | int mymspace; | |
865 | GetThreadCache(&p, &tc, &mymspace, &bytes); | |
866 | { /* Use this thread's mspace */ | |
867 | GETMSPACE(m, p, tc, mymspace, bytes, | |
868 | ret=mspace_memalign(m, alignment, bytes)); | |
869 | } | |
870 | return ret; | |
871 | } | |
872 | #if !NO_MALLINFO | |
873 | struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC | |
874 | { | |
875 | int n; | |
876 | struct mallinfo ret={0}; | |
877 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
878 | for(n=0; p->m[n]; n++) | |
879 | { | |
880 | struct mallinfo t=mspace_mallinfo(p->m[n]); | |
881 | ret.arena+=t.arena; | |
882 | ret.ordblks+=t.ordblks; | |
883 | ret.hblkhd+=t.hblkhd; | |
884 | ret.usmblks+=t.usmblks; | |
885 | ret.uordblks+=t.uordblks; | |
886 | ret.fordblks+=t.fordblks; | |
887 | ret.keepcost+=t.keepcost; | |
888 | } | |
889 | return ret; | |
890 | } | |
891 | #endif | |
892 | int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC | |
893 | { | |
894 | return mspace_mallopt(parno, value); | |
895 | } | |
896 | int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC | |
897 | { | |
898 | int n, ret=0; | |
899 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
900 | for(n=0; p->m[n]; n++) | |
901 | { | |
902 | ret+=mspace_trim(p->m[n], pad); | |
903 | } | |
904 | return ret; | |
905 | } | |
906 | void nedpmalloc_stats(nedpool *p) THROWSPEC | |
907 | { | |
908 | int n; | |
909 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
910 | for(n=0; p->m[n]; n++) | |
911 | { | |
912 | mspace_malloc_stats(p->m[n]); | |
913 | } | |
914 | } | |
915 | size_t nedpmalloc_footprint(nedpool *p) THROWSPEC | |
916 | { | |
917 | size_t ret=0; | |
918 | int n; | |
919 | if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } | |
920 | for(n=0; p->m[n]; n++) | |
921 | { | |
922 | ret+=mspace_footprint(p->m[n]); | |
923 | } | |
924 | return ret; | |
925 | } | |
926 | void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC | |
927 | { | |
928 | void **ret; | |
929 | threadcache *tc; | |
930 | int mymspace; | |
931 | GetThreadCache(&p, &tc, &mymspace, &elemsize); | |
932 | GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, | |
933 | ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); | |
934 | return ret; | |
935 | } | |
936 | void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC | |
937 | { | |
938 | void **ret; | |
939 | threadcache *tc; | |
940 | int mymspace; | |
1e701059 JS |
941 | size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); |
942 | if(!adjustedsizes) return 0; | |
943 | for(i=0; i<elems; i++) | |
944 | adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i]; | |
f0ed8226 MSO |
945 | GetThreadCache(&p, &tc, &mymspace, 0); |
946 | GETMSPACE(m, p, tc, mymspace, 0, | |
947 | ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks)); | |
948 | return ret; | |
949 | } | |
950 | ||
f0ed8226 MSO |
951 | #if defined(__cplusplus) |
952 | } | |
953 | #endif |