From: DJ Delorie Date: Tue, 8 Nov 2016 23:41:49 +0000 (-0500) Subject: Merge branch 'master' into dj/malloc X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4f460c8944fa682376dfe63ceb39ac4a5a031232;p=thirdparty%2Fglibc.git Merge branch 'master' into dj/malloc --- 4f460c8944fa682376dfe63ceb39ac4a5a031232 diff --cc malloc/Makefile index 761a976fa33,b8efcd68bcc..f34c2a75bab --- a/malloc/Makefile +++ b/malloc/Makefile @@@ -41,9 -49,18 +49,18 @@@ install-lib := libmcheck. non-lib.a := libmcheck.a # Additional library. -extra-libs = libmemusage +extra-libs = libmemusage libmtracectl extra-libs-others = $(extra-libs) + # Helper objects for some tests. + extra-tests-objs += \ + tst-interpose-aux-nothread.o \ + tst-interpose-aux-thread.o \ + + test-extras = \ + tst-interpose-aux-nothread \ + tst-interpose-aux-thread \ + libmemusage-routines = memusage libmemusage-inhibit-o = $(filter-out .os,$(object-suffixes)) diff --cc malloc/malloc.c index 1fa9487f188,584edbf059e..bd8a1d4bb88 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@@ -273,24 -273,6 +273,25 @@@ #define MALLOC_DEBUG 0 #endif - #define USE_TCACHE 1 - ++#ifndef USE_TCACHE ++#define USE_TCACHE 0 ++#endif +#if USE_TCACHE +/* we want 64 entries */ +#define MAX_TCACHE_SIZE (MALLOC_ALIGNMENT * 63) +#define TCACHE_IDX ((MAX_TCACHE_SIZE / MALLOC_ALIGNMENT) + 1) +#define size2tidx(bytes) (((bytes) + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT) + +/* Rounds up, so... + idx 0 bytes 0 + idx 1 bytes 1..8 + idx 2 bytes 9..16 + etc +*/ + +#define TCACHE_FILL_COUNT 7 +#endif + #ifdef NDEBUG # define assert(expr) ((void) 0) #else @@@ -1045,387 -1009,6 +1046,387 @@@ static void* realloc_check(void* oldm static void* memalign_check(size_t alignment, size_t bytes, const void *caller); +/* ------------------ TRACE support ------------------ */ +#define USE_MTRACE 1 +#if USE_MTRACE +#include "mtrace.h" + +typedef struct __malloc_trace_map_entry_s { + int ref_count; + __malloc_trace_buffer_ptr window; +} __malloc_trace_map_entry; + +/* 16 Tb max file size, 64 Mb per window */ +#define TRACE_MAPPING_SIZE 67108864 +#define TRACE_N_PER_MAPPING (TRACE_MAPPING_SIZE / sizeof (struct __malloc_trace_buffer_s)) +#define TRACE_N_MAPPINGS 262144 +#define TRACE_MAX_COUNT (TRACE_N_PER_MAPPING * TRACE_N_MAPPINGS) + +/* Index into __malloc_trace_buffer[] */ +#define TRACE_COUNT_TO_MAPPING_NUM(count) ((count) / TRACE_N_PER_MAPPING) +/* Index info __malloc_trace_buffer[n][] */ +#define TRACE_COUNT_TO_MAPPING_IDX(count) ((count) % TRACE_N_PER_MAPPING) + +/* Global mutex for the trace buffer tree itself. */ - mutex_t __malloc_trace_mutex; ++libc_lock_define_initialized (static, __malloc_trace_mutex); + +/* Global counter, "full" when equal to TRACE_MAX_COUNT. Points to + the next available slot, so POST-INCREMENT it. */ +volatile size_t __malloc_trace_count = 0; + +/* Array of TRACE_N_MAPPINGS pointers to potentially mapped trace buffers. */ +volatile __malloc_trace_map_entry *__malloc_trace_buffer = NULL; +/* The file we're mapping them to. */ +char * __malloc_trace_filename = NULL; + +/* Global trace enable flag. Default off. + If global trace enable is 1 then tracing is carried out for all + threads. Otherwise no threads trace calls. */ +volatile int __malloc_trace_enabled = 0; + +/* Per-thread trace enable flag. Default on. + If thread trace enable is 1 then tracing for the thread behaves as expected + per the global trace enabled value. + If thread trace enable is 0 then __MTB_TRACE_ENTRY and __MTB_TRACE_SET + do nothing, only __MTB_TRACE_PATH sets path bits i.e. no new traces are + created, the existing trace is used to store path bits. + The purpose of this is to allow the implementation to nest public API + calls, track paths, without creating multiple nested trace events. */ +__thread int __malloc_thread_trace_enabled = 1; + +static __thread int __malloc_trace_last_num = -1; +static __thread __malloc_trace_buffer_ptr trace_ptr; +static __thread struct __malloc_trace_buffer_s temporary_trace_record; + +static inline pid_t +__gettid (void) +{ + struct pthread *pd = THREAD_SELF; + pid_t selftid = THREAD_GETMEM (pd, tid); + if (selftid == 0) + { + /* This system call is not supposed to fail. */ +#ifdef INTERNAL_SYSCALL + INTERNAL_SYSCALL_DECL (err); + selftid = INTERNAL_SYSCALL (gettid, err, 0); +#else + selftid = INLINE_SYSCALL (gettid, 0); +#endif + THREAD_SETMEM (pd, tid, selftid); + } + + return selftid; +} + +static void +__mtb_trace_entry (uint32_t type, size_t size, void *ptr1) +{ + trace_ptr = &temporary_trace_record; + + trace_ptr->thread = __gettid (); + trace_ptr->type = type; + trace_ptr->path_thread_cache = 0; + trace_ptr->path_cpu_cache = 0; + trace_ptr->path_cpu_cache2 = 0; + trace_ptr->path_sbrk = 0; + trace_ptr->path_mmap = 0; + trace_ptr->path_munmap = 0; + trace_ptr->path_m_f_realloc = 0; + trace_ptr->path_hook = 0; + trace_ptr->path_unsorted_add = 0; + trace_ptr->path_unsorted_remove = 0; + trace_ptr->path_unsorted_empty = 0; + trace_ptr->path_fastbin_add = 0; + trace_ptr->path_fastbin_remove = 0; + trace_ptr->path_malloc_consolidate = 0; + trace_ptr->path = 0; + trace_ptr->ptr1 = ptr1; + trace_ptr->ptr2 = 0; + trace_ptr->size = size; + trace_ptr->size2 = 0; + trace_ptr->size3 = 0; +} + +/* Note: "record" the verb, not "record" the noun. This call records + the accumulated trace data into the trace buffer, and should be + called when the caller "owns" the pointers being recorded, to avoid + trace inversion. */ +static void +__mtb_trace_record (void) +{ + size_t my_trace_count; + size_t old_trace_count; + int my_num; + __malloc_trace_buffer_ptr new_trace_ptr; + + /* START T: Log trace event. */ + alg_t1: + /* T1. Perform a load-acq of the global trace offset. */ + my_trace_count = atomic_load_acquire (&__malloc_trace_count); + + /* T2. If the window number is different from the current + thread-local window number, proceed with algorithm W below. */ + my_num = TRACE_COUNT_TO_MAPPING_NUM (my_trace_count); + if (my_num != __malloc_trace_last_num) + { + long new_window; + int new_ref_count; + + /* START W: Switch window. */ + + /* W1. Acquire the global window lock. */ - (void) mutex_lock (&__malloc_trace_mutex); ++ __libc_lock_lock (__malloc_trace_mutex); + + /* W2. If the thread-local window number is not -1, decrement the reference + counter for the current thread window. */ + if (__malloc_trace_last_num != -1) + { + int old_window = __malloc_trace_last_num; + int old_ref_count = catomic_exchange_and_add (&__malloc_trace_buffer[old_window].ref_count, -1); + /* W3. If that reference counter reached 0, unmap the window. */ + if (old_ref_count == 1) + { + __munmap (__malloc_trace_buffer[old_window].window, TRACE_MAPPING_SIZE); + __malloc_trace_buffer[old_window].window = NULL; + } + } + + /* W4. Perform a load-relaxed of the global trace offset. */ + my_trace_count = atomic_load_relaxed (&__malloc_trace_count); + + /* W5. Increment the reference counter of the corresponding window. */ + new_window = TRACE_COUNT_TO_MAPPING_NUM (my_trace_count); + new_ref_count = catomic_exchange_and_add (&__malloc_trace_buffer[new_window].ref_count, 1); + + /* W6. If the incremented reference counter is 1, perform algorithm F. */ + if (new_ref_count == 0) + { + /* START F: Map window from file. */ + + /* Note: There are security issues wrt opening a file by + name many times. We know this, and the risk is low (if + you have root access, there are better ways to wreak + havoc). We choose this design so that there isn't an + open file handle which may interefere with, or be + corrupted by, the running application. */ + + /* F1. Open the trace file. */ + int trace_fd = __open (__malloc_trace_filename, O_RDWR|O_CREAT, 0666); + if (trace_fd < 0) + { + /* FIXME: Better handling of errors? */ + __libc_message (0, "Can't open trace buffer file %s\n", __malloc_trace_filename); + atomic_store_release (&__malloc_trace_enabled, 0); - (void) mutex_unlock (&__malloc_trace_mutex); ++ __libc_lock_unlock (__malloc_trace_mutex); + return; + } + + /* F2. Extend the file length so that it covers the end of the current + window (using ftruncate, needed to avoid SIGBUS). */ + __ftruncate (trace_fd, (new_window + 1) * TRACE_MAPPING_SIZE); + + /* F3. Map the window from the file offset corresponding to + the current window. */ + void *ptr = + __mmap (NULL, TRACE_MAPPING_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + trace_fd, new_window * TRACE_MAPPING_SIZE); + if (ptr == NULL) + { + /* FIXME: Better handling of errors? */ + __libc_message (0, "Can't map trace_buffer file %s\n", __malloc_trace_filename); + atomic_store_release (&__malloc_trace_enabled, 0); - (void) mutex_unlock (&__malloc_trace_mutex); ++ __libc_lock_unlock (__malloc_trace_mutex); + return; + } + + /* F4. Update the mapping pointer in the active window array. */ + __malloc_trace_buffer[new_window].window = ptr; + + /* F5. Close the file. */ + __close (trace_fd); + + /* F6. Continue with step W7. */ + /* END F */ + } + + /* W7. Assign the window number to the thread-local window number, + switching the thread window. */ + __malloc_trace_last_num = new_window; + + /* W8. Release the global window lock. */ - (void) mutex_unlock (&__malloc_trace_mutex); ++ __libc_lock_unlock (__malloc_trace_mutex); + + /* W9. Continue at T1. */ + goto alg_t1; + + /* END W */ + } + + /* T3. CAS-acqrel the incremented global trace offset. If CAS + fails, go back to T1. */ + old_trace_count = catomic_exchange_and_add (&__malloc_trace_count, 1); + /* See if someone else incremented it while we weren't looking. */ + if (old_trace_count != my_trace_count) + goto alg_t1; + + /* T4. Write the trace data. */ + /* At this point, __malloc_trace_buffer[my_num] is valid because we + DIDN'T go through algorithm W, and it's reference counted for us, + and my_trace_count points to our record. */ + new_trace_ptr = __malloc_trace_buffer[my_num].window + TRACE_COUNT_TO_MAPPING_IDX (my_trace_count); + + /* At this point, we move trace data from our temporary record + (where we've been recording, among other things, path data) to + the trace buffer. Future trace data for this call will get + recorded directly to the trace buffer. */ + *new_trace_ptr = *trace_ptr; + trace_ptr = new_trace_ptr; +} + +/* Initialize the trace buffer and backing file. The file is + overwritten if it already exists. */ +void +__malloc_trace_init (char *filename) +{ + int pagesize = __sysconf(_SC_PAGE_SIZE); + int main_length = TRACE_N_MAPPINGS * sizeof (__malloc_trace_buffer[0]); + int total_length = (main_length + strlen(filename) + 1 + pagesize-1) & (~(pagesize-1)); + char *mapping; + + mapping = __mmap (NULL, total_length, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mapping == NULL) + return; + + strcpy (mapping + main_length, filename); + __malloc_trace_filename = mapping + main_length; + + __malloc_trace_buffer = (__malloc_trace_map_entry *) mapping; + - mutex_init (&__malloc_trace_mutex); ++ __libc_lock_init (__malloc_trace_mutex); + __malloc_trace_count = 0; + + __mtb_trace_entry (__MTB_TYPE_MAGIC, sizeof(void *), (void *)0x1234); + atomic_store_release (&__malloc_trace_enabled, 1); + /* This will reset __malloc_trace_enabled if it fails. */ + __mtb_trace_record (); +} + +/* All remaining functions return current count of trace records. */ + +/* Pause - but don't stop - tracing. */ +size_t __malloc_trace_pause (void) +{ + atomic_store_release (&__malloc_trace_enabled, 0); + return atomic_load_relaxed (&__malloc_trace_count); +} + +/* Resume tracing where it left off when paused. */ +size_t __malloc_trace_unpause (void) +{ + if (__malloc_trace_buffer != NULL) + atomic_store_release (&__malloc_trace_enabled, 1); + return atomic_load_relaxed (&__malloc_trace_count); +} + +/* Stop tracing and clean up all the trace buffer mappings. */ +size_t __malloc_trace_stop (void) +{ + atomic_store_release (&__malloc_trace_enabled, 0); + /* FIXME: we can't actually release everything until all threads + have finished accessing the buffer, but we have no way of doing + that... */ + + /* For convenience, reduce the file size to only what's needed, else + the minimum file size we'll see if 64 Mb. */ + int trace_fd = __open (__malloc_trace_filename, O_RDWR|O_CREAT, 0666); + if (trace_fd >= 0) + { + __ftruncate (trace_fd, __malloc_trace_count * sizeof (struct __malloc_trace_buffer_s)); + __close (trace_fd); + } + + return atomic_load_relaxed (&__malloc_trace_count); +} + +/* Sync all buffer data to file (typically a no-op on Linux). */ +size_t __malloc_trace_sync (void) +{ + return atomic_load_relaxed (&__malloc_trace_count); +} + +/* CONCURRENCY NOTES: The load acquire here synchronizes with the store release + from __malloc_trace_init to ensure that all threads see the initialization + done by the first thread that calls __malloc_trace_init. The load acquire + also synchronizes with the store releases in __mtb_trace_entry to ensure + that all error cleanup is visible. Lastly it synchronizes with the store + releases from __malloc_trace_pause, __malloc_trace_unpause, and + __malloc_trace_top to ensure that all external changes are visible to the + current thread. */ + +/* Note: ENTRY is for function entry, and starts a per-thread record. + RECORD migrates that record into the common trace buffer. Timing + of the RECORD is critical to getting a valid trace record; it + should only be called when the function owns the pointers being + recorded. I.e. malloc should RECORD after obtaining a pointer, + free should RECORD before free'ing it. */ + +/* Be careful that __MTB_TRACE_RECORD is not called inside your own + ENABLE/DISABLE pair (this applies to your own call frame, not a + nested call). */ + +#define __MTB_TRACE_ENTRY(type, size, ptr1) \ + if (__glibc_unlikely (atomic_load_acquire (&__malloc_trace_enabled)) \ + && __glibc_unlikely (__malloc_thread_trace_enabled)) \ + __mtb_trace_entry (__MTB_TYPE_##type,size,ptr1); +#define __MTB_TRACE_RECORD() \ + if (__glibc_unlikely (atomic_load_acquire (&__malloc_trace_enabled)) \ + && __glibc_unlikely (__malloc_thread_trace_enabled)) \ + __mtb_trace_record (); + +/* Ignore __malloc_thread_trace_enabled and set path bits. This allows us to + track the path of a call without additional traces. For example realloc + can call malloc and free without making new trace, but we record the paths + taken in malloc and free. */ +#define __MTB_TRACE_PATH(mpath) \ + if (__glibc_unlikely (trace_ptr != NULL)) \ + trace_ptr->path_##mpath = 1; + +#define __MTB_TRACE_SET(var,value) \ + if (__glibc_unlikely (__malloc_thread_trace_enabled) \ + && __glibc_unlikely (trace_ptr != NULL)) \ + trace_ptr->var = value; + +/* Allow __MTB_TRACE_ENTRY to create new trace entries. */ +#define __MTB_THREAD_TRACE_ENABLE() \ + ({ \ + __malloc_thread_trace_enabled = 1; \ + }) + +/* Disallow __MTB_TRACE_ENTRY from creating new trace + entries. Use of __MTB_TRACE_SET becomes a NOOP, but + __MTB_TRACE_PATH still sets the unique path bit in + the trace (all path bits are unique). */ +#define __MTB_THREAD_TRACE_DISABLE() \ + ({ \ + __malloc_thread_trace_enabled = 0; \ + }) + +#else +void __malloc_trace_init (char *filename) {} +size_t __malloc_trace_pause (void) { return 0; } +size_t __malloc_trace_unpause (void) { return 0; } +size_t __malloc_trace_stop (void) { return 0; } +size_t __malloc_trace_sync (void) { return 0; } + +#define __MTB_TRACE_ENTRY(type,size,ptr1) +#define __MTB_TRACE_RECORD() +#define __MTB_TRACE_PATH(mpath) +#define __MTB_TRACE_SET(var,value) +#define __MTB_THREAD_TRACE_ENABLE() +#define __MTB_THREAD_TRACE_DISNABLE() +#endif + /* ------------------ MMAP support ------------------ */ @@@ -2139,21 -1749,8 +2174,19 @@@ static struct malloc_par mp_ .trim_threshold = DEFAULT_TRIM_THRESHOLD, #define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8)) .arena_test = NARENAS_FROM_NCORES (1) +#if USE_TCACHE + , + .tcache_count = TCACHE_FILL_COUNT, + .tcache_max = TCACHE_IDX-1 +#endif }; - +/* Non public mallopt parameters. */ - #define M_ARENA_TEST -7 - #define M_ARENA_MAX -8 ++#if USE_TCACHE +#define M_TCACHE_COUNT -9 +#define M_TCACHE_MAX -10 - ++#endif + /* Maximum size of memory handled in fastbins. */ static INTERNAL_SIZE_T global_max_fast; @@@ -3305,44 -2874,6 +3335,44 @@@ mremap_chunk (mchunkptr p, size_t new_s /*------------------------ Public wrappers. --------------------------------*/ +#if USE_TCACHE + +typedef struct TCacheEntry { + struct TCacheEntry *next; +} TCacheEntry; + +typedef struct TCache { + struct TCache *prev, *next; + char initted; /* 0 = uninitted, 1 = normal, anything else = shutting down */ + char counts[TCACHE_IDX]; + TCacheEntry *entries[TCACHE_IDX]; +} TCache; + +static TCache *tcache_list = NULL; - static mutex_t tcache_mutex = _LIBC_LOCK_INITIALIZER; ++__libc_lock_define_initialized (static, tcache_mutex); + +static __thread TCache tcache = {0,0,0,{0},{0}}; + +static void __attribute__ ((section ("__libc_thread_freeres_fn"))) +tcache_thread_freeres (void) +{ + if (tcache.initted == 1) + { - (void) mutex_lock (&tcache_mutex); ++ libc_lock_lock (tcache_mutex); + tcache.initted = 2; + if (tcache.next) + tcache.next->prev = tcache.prev; + if (tcache.prev) + tcache.prev->next = tcache.next; + else + tcache_list = tcache.next; - (void) mutex_unlock (&tcache_mutex); ++ libc_lock_unlock (tcache_mutex); + } +} +text_set_element (__libc_thread_subfreeres, tcache_thread_freeres); + +#endif + void * __libc_malloc (size_t bytes) { @@@ -3354,143 -2883,8 +3384,143 @@@ void *(*hook) (size_t, const void *) = atomic_forced_read (__malloc_hook); if (__builtin_expect (hook != NULL, 0)) - return (*hook)(bytes, RETURN_ADDRESS (0)); + { + __MTB_TRACE_PATH (hook); + __MTB_THREAD_TRACE_DISABLE (); + victim = (*hook)(bytes, RETURN_ADDRESS (0)); + __MTB_THREAD_TRACE_ENABLE (); + __MTB_TRACE_RECORD (); + if (victim != NULL) + __MTB_TRACE_SET (size3, chunksize (mem2chunk (victim))); + return victim; + } + +#if USE_TCACHE + /* int_free also calls request2size, be careful to not pad twice. */ + size_t tbytes = request2size(bytes); + size_t tc_idx = size2tidx (tbytes); + + if (tcache.initted == 0) + { + tcache.initted = 1; - (void) mutex_lock (&tcache_mutex); ++ __libc_lock_lock (tcache_mutex); + tcache.next = tcache_list; + if (tcache.next) + tcache.next->prev = &tcache; + tcache_list = &tcache; - (void) mutex_unlock (&tcache_mutex); ++ __libc_lock_unlock (tcache_mutex); + } + + if (tc_idx < mp_.tcache_max + && tc_idx < TCACHE_IDX /* to appease gcc */ + && tcache.entries[tc_idx] != NULL + && tcache.initted == 1) + { + TCacheEntry *e = tcache.entries[tc_idx]; + tcache.entries[tc_idx] = e->next; + tcache.counts[tc_idx] --; + __MTB_TRACE_RECORD (); + __MTB_TRACE_PATH (thread_cache); + __MTB_TRACE_SET (ptr2, e); + __MTB_TRACE_SET (size3, tbytes); + return (void *) e; + } +#endif + +#if 0 && USE_TCACHE + /* This is fast but causes internal fragmentation, as it always + pulls large chunks but puts small chunks, leading to a large + backlog of small chunks. */ + if (tc_idx < mp_.tcache_max + && tcache.initted == 1) + { + void *ent; + size_t tc_bytes = tc_idx * MALLOC_ALIGNMENT; + size_t tc_ibytes; + size_t total_bytes; + int i; + + assert (tc_bytes >= tbytes); + + if (tc_bytes < 2 * SIZE_SZ) + tc_bytes = 2 * SIZE_SZ; + tc_ibytes = tc_bytes + 2*SIZE_SZ; + + total_bytes = tc_bytes + tc_ibytes * mp_.tcache_count; + + __MTB_TRACE_PATH (thread_cache); + __MTB_TRACE_PATH (cpu_cache); + arena_get (ar_ptr, total_bytes); + + if (ar_ptr) + { + ent = _int_malloc (ar_ptr, total_bytes); + /* Retry with another arena only if we were able to find a usable arena + before. */ + if (!ent && ar_ptr != NULL) + { + __MTB_TRACE_PATH (cpu_cache2); + LIBC_PROBE (memory_malloc_retry, 1, total_bytes); + ar_ptr = arena_get_retry (ar_ptr, total_bytes); + ent = _int_malloc (ar_ptr, total_bytes); + //_m_printf("tc2: av %p sz %lx rv %p\n", ar_ptr, total_bytes, ent); + } + + if (ent) + { + mchunkptr m = mem2chunk (ent); + TCacheEntry *e; + int flags = m->size & SIZE_BITS; + size_t old_size = m->size & ~SIZE_BITS; + size_t extra = old_size - total_bytes - 2*SIZE_SZ; + +#if 0 + tid = syscall(__NR_gettid); + _m_printf("%04x tc: av %p sz %5lx.%5lx.%2d rv %p %16lx %16lx %d\n", + tid, ar_ptr, m->size, total_bytes, (int)extra, ent, (int64_t)m->prev_size, (int64_t)m->size, bytes); +#endif + if (flags & IS_MMAPPED) + { + write (2, "\033[31mMMAPPED CACHE BLOCK\033[0m\n", 29); + } + + m->size = tc_ibytes | flags; + flags |= PREV_INUSE; + + for (i = 0; i < mp_.tcache_count; i++) + { + m = (mchunkptr) (ent + i * tc_ibytes + tc_bytes); + e = (TCacheEntry *) (ent + i * tc_ibytes + tc_ibytes); + + // _m_printf("%04x \t%p %d\n", tid, m, tc_ibytes); + /* Not needed because the previous chunk is "in use". */ + m->size = tc_ibytes | flags; + e->next = tcache.entries[tc_idx]; + tcache.entries[tc_idx] = e; + tcache.counts[tc_idx] ++; + } + m->size = (tc_ibytes + extra) | flags; + /* Not needed because our last chunk is "in use". */ + /*m = (mchunkptr) (ent + total_bytes); + m->prev_size = tc_ibytes + extra;*/ + } + + /* This must go after the above code to ensure that other + threads see our changes, even though we're sending this chunk + up to the app. */ + if (ar_ptr != NULL) - (void) mutex_unlock (&ar_ptr->mutex); ++ __libc_lock_unlock (ar_ptr->mutex); + + __MTB_TRACE_RECORD (); + __MTB_TRACE_SET(ptr2, ent); + __MTB_TRACE_SET (size3, chunksize (mem2chunk (ent))); + return ent; + } + } +#endif + + __MTB_TRACE_PATH (cpu_cache); arena_get (ar_ptr, bytes); victim = _int_malloc (ar_ptr, bytes); @@@ -3724,13 -3040,11 +3754,13 @@@ __libc_realloc (void *oldmem, size_t by return newmem; } - (void) mutex_lock (&ar_ptr->mutex); + __libc_lock_lock (ar_ptr->mutex); + /* We expect _int_realloc() to call MTB_TRACE_RECORD for us, if it + returns non-NULL. */ newp = _int_realloc (ar_ptr, oldp, oldsize, nb); - (void) mutex_unlock (&ar_ptr->mutex); + __libc_lock_unlock (ar_ptr->mutex); assert (!newp || chunk_is_mmapped (mem2chunk (newp)) || ar_ptr == arena_for_chunk (mem2chunk (newp))); @@@ -3987,10 -3247,8 +4017,10 @@@ __libc_calloc (size_t n, size_t elem_si } if (av != NULL) - (void) mutex_unlock (&av->mutex); + __libc_lock_unlock (av->mutex); + __MTB_TRACE_RECORD (); + /* Allocation failed even after a retry. */ if (mem == 0) return 0; @@@ -4267,43 -3423,8 +4297,43 @@@ _int_malloc (mstate av, size_t bytes bck->fd = bin; if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; + set_non_main_arena (victim); check_malloced_chunk (av, victim, nb); +#if USE_TCACHE + /* While we're here, if we see other chunk of the same size, + stash them in the tcache. */ + size_t tc_idx = size2tidx (nb-SIZE_SZ); + if (tc_idx < mp_.tcache_max) + { + mchunkptr tc_victim; + int found = 0; + + /* While bin not empty and tcache not full, copy chunks over. */ + while (tcache.counts[tc_idx] < mp_.tcache_count + && (tc_victim = last(bin)) != bin) + { + if (tc_victim != 0) + { + bck = tc_victim->bk; + set_inuse_bit_at_offset (tc_victim, nb); + if (av != &main_arena) - tc_victim->size |= NON_MAIN_ARENA; ++ set_non_main_arena (tc_victim); + bin->bk = bck; + bck->fd = bin; + + TCacheEntry *e = (TCacheEntry *) chunk2mem(tc_victim); + e->next = tcache.entries[tc_idx]; + tcache.entries[tc_idx] = e; + tcache.counts[tc_idx] ++; + found ++; + //_m_printf("snarf chunk %p %lx %p %lx\n", tc_victim, nb, + // chunk_at_offset(tc_victim, nb), chunk_at_offset(tc_victim, nb)->size); + } + } + //_m_printf("%d chunks found in smallbin\n", found); + } +#endif + //_m_printf("%d: return %p\n", __LINE__, victim); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; @@@ -4362,10 -3468,10 +4392,11 @@@ int iters = 0; while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av)) { + __MTB_TRACE_PATH(unsorted_remove); bck = victim->bk; - if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0) - || __builtin_expect (victim->size > av->system_mem, 0)) + if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0) + || __builtin_expect (chunksize_nomask (victim) + > av->system_mem, 0)) malloc_printerr (check_action, "malloc(): memory corruption", chunk2mem (victim), av); size = chunksize (victim); @@@ -4428,27 -3522,8 +4459,26 @@@ { set_inuse_bit_at_offset (victim, size); if (av != &main_arena) - victim->size |= NON_MAIN_ARENA; + set_non_main_arena (victim); + +#if USE_TCACHE + /* Fill cache first, return to user only if cache fills. + We may return one of these chunks later. */ + if (tcache_nb + && tcache.counts[tc_idx] < mp_.tcache_count) + { + TCacheEntry *e = (TCacheEntry *) chunk2mem(victim); + e->next = tcache.entries[tc_idx]; + tcache.entries[tc_idx] = e; + tcache.counts[tc_idx] ++; + return_cached = 1; + continue; + } + else + { +#endif - check_malloced_chunk (av, victim, nb); + //_m_printf("%d: return %p\n", __LINE__, victim); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; @@@ -5153,9 -4170,8 +5189,9 @@@ static void malloc_consolidate(mstate a check_inuse_chunk(av, p); nextp = p->fd; + __MTB_TRACE_PATH(fastbin_remove); /* Slightly streamlined version of consolidation code in free() */ - size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA); + size = chunksize (p); nextchunk = chunk_at_offset(p, size); nextsize = chunksize(nextchunk); @@@ -5235,10 -4250,8 +5271,10 @@@ _int_realloc(mstate av, mchunkptr oldp const char *errstr = NULL; + /* We must call __MTB_TRACE_RECORD if we return non-NULL. */ + /* oldmem size */ - if (__builtin_expect (oldp->size <= 2 * SIZE_SZ, 0) + if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0) || __builtin_expect (oldsize >= av->system_mem, 0)) { errstr = "realloc(): invalid old size"; @@@ -5831,33 -4902,10 +5929,30 @@@ __libc_mallopt (int param_number, int v case M_ARENA_MAX: if (value > 0) - { - LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max); - mp_.arena_max = value; - } + do_set_arena_test (value); break; +#if USE_TCACHE + case M_TCACHE_COUNT: + if (value >= 0) + { + LIBC_PROBE (memory_mallopt_tcache_count, 2, value, mp_.tcache_count); + mp_.tcache_count = value; + } + break; + case M_TCACHE_MAX: + if (value >= 0) + { + value = size2tidx (value); + if (value < TCACHE_IDX) + { + LIBC_PROBE (memory_mallopt_tcache_max, 2, value, mp_.tcache_max); + mp_.tcache_max = value; + } + } + break; +#endif } - (void) mutex_unlock (&av->mutex); + __libc_lock_unlock (av->mutex); return res; } libc_hidden_def (__libc_mallopt)