#define MALLOC_DEBUG 0
#endif
- #define USE_TCACHE 1
-
++#ifndef USE_TCACHE
++#define USE_TCACHE 0
++#endif
+#if USE_TCACHE
+/* we want 64 entries */
+#define MAX_TCACHE_SIZE (MALLOC_ALIGNMENT * 63)
+#define TCACHE_IDX ((MAX_TCACHE_SIZE / MALLOC_ALIGNMENT) + 1)
+#define size2tidx(bytes) (((bytes) + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+
+/* Rounds up, so...
+ idx 0 bytes 0
+ idx 1 bytes 1..8
+ idx 2 bytes 9..16
+ etc
+*/
+
+#define TCACHE_FILL_COUNT 7
+#endif
+
#ifdef NDEBUG
# define assert(expr) ((void) 0)
#else
static void* memalign_check(size_t alignment, size_t bytes,
const void *caller);
- mutex_t __malloc_trace_mutex;
+/* ------------------ TRACE support ------------------ */
+#define USE_MTRACE 1
+#if USE_MTRACE
+#include "mtrace.h"
+
+typedef struct __malloc_trace_map_entry_s {
+ int ref_count;
+ __malloc_trace_buffer_ptr window;
+} __malloc_trace_map_entry;
+
+/* 16 Tb max file size, 64 Mb per window */
+#define TRACE_MAPPING_SIZE 67108864
+#define TRACE_N_PER_MAPPING (TRACE_MAPPING_SIZE / sizeof (struct __malloc_trace_buffer_s))
+#define TRACE_N_MAPPINGS 262144
+#define TRACE_MAX_COUNT (TRACE_N_PER_MAPPING * TRACE_N_MAPPINGS)
+
+/* Index into __malloc_trace_buffer[] */
+#define TRACE_COUNT_TO_MAPPING_NUM(count) ((count) / TRACE_N_PER_MAPPING)
+/* Index info __malloc_trace_buffer[n][] */
+#define TRACE_COUNT_TO_MAPPING_IDX(count) ((count) % TRACE_N_PER_MAPPING)
+
+/* Global mutex for the trace buffer tree itself. */
- (void) mutex_lock (&__malloc_trace_mutex);
++libc_lock_define_initialized (static, __malloc_trace_mutex);
+
+/* Global counter, "full" when equal to TRACE_MAX_COUNT. Points to
+ the next available slot, so POST-INCREMENT it. */
+volatile size_t __malloc_trace_count = 0;
+
+/* Array of TRACE_N_MAPPINGS pointers to potentially mapped trace buffers. */
+volatile __malloc_trace_map_entry *__malloc_trace_buffer = NULL;
+/* The file we're mapping them to. */
+char * __malloc_trace_filename = NULL;
+
+/* Global trace enable flag. Default off.
+ If global trace enable is 1 then tracing is carried out for all
+ threads. Otherwise no threads trace calls. */
+volatile int __malloc_trace_enabled = 0;
+
+/* Per-thread trace enable flag. Default on.
+ If thread trace enable is 1 then tracing for the thread behaves as expected
+ per the global trace enabled value.
+ If thread trace enable is 0 then __MTB_TRACE_ENTRY and __MTB_TRACE_SET
+ do nothing, only __MTB_TRACE_PATH sets path bits i.e. no new traces are
+ created, the existing trace is used to store path bits.
+ The purpose of this is to allow the implementation to nest public API
+ calls, track paths, without creating multiple nested trace events. */
+__thread int __malloc_thread_trace_enabled = 1;
+
+static __thread int __malloc_trace_last_num = -1;
+static __thread __malloc_trace_buffer_ptr trace_ptr;
+static __thread struct __malloc_trace_buffer_s temporary_trace_record;
+
+static inline pid_t
+__gettid (void)
+{
+ struct pthread *pd = THREAD_SELF;
+ pid_t selftid = THREAD_GETMEM (pd, tid);
+ if (selftid == 0)
+ {
+ /* This system call is not supposed to fail. */
+#ifdef INTERNAL_SYSCALL
+ INTERNAL_SYSCALL_DECL (err);
+ selftid = INTERNAL_SYSCALL (gettid, err, 0);
+#else
+ selftid = INLINE_SYSCALL (gettid, 0);
+#endif
+ THREAD_SETMEM (pd, tid, selftid);
+ }
+
+ return selftid;
+}
+
+static void
+__mtb_trace_entry (uint32_t type, size_t size, void *ptr1)
+{
+ trace_ptr = &temporary_trace_record;
+
+ trace_ptr->thread = __gettid ();
+ trace_ptr->type = type;
+ trace_ptr->path_thread_cache = 0;
+ trace_ptr->path_cpu_cache = 0;
+ trace_ptr->path_cpu_cache2 = 0;
+ trace_ptr->path_sbrk = 0;
+ trace_ptr->path_mmap = 0;
+ trace_ptr->path_munmap = 0;
+ trace_ptr->path_m_f_realloc = 0;
+ trace_ptr->path_hook = 0;
+ trace_ptr->path_unsorted_add = 0;
+ trace_ptr->path_unsorted_remove = 0;
+ trace_ptr->path_unsorted_empty = 0;
+ trace_ptr->path_fastbin_add = 0;
+ trace_ptr->path_fastbin_remove = 0;
+ trace_ptr->path_malloc_consolidate = 0;
+ trace_ptr->path = 0;
+ trace_ptr->ptr1 = ptr1;
+ trace_ptr->ptr2 = 0;
+ trace_ptr->size = size;
+ trace_ptr->size2 = 0;
+ trace_ptr->size3 = 0;
+}
+
+/* Note: "record" the verb, not "record" the noun. This call records
+ the accumulated trace data into the trace buffer, and should be
+ called when the caller "owns" the pointers being recorded, to avoid
+ trace inversion. */
+static void
+__mtb_trace_record (void)
+{
+ size_t my_trace_count;
+ size_t old_trace_count;
+ int my_num;
+ __malloc_trace_buffer_ptr new_trace_ptr;
+
+ /* START T: Log trace event. */
+ alg_t1:
+ /* T1. Perform a load-acq of the global trace offset. */
+ my_trace_count = atomic_load_acquire (&__malloc_trace_count);
+
+ /* T2. If the window number is different from the current
+ thread-local window number, proceed with algorithm W below. */
+ my_num = TRACE_COUNT_TO_MAPPING_NUM (my_trace_count);
+ if (my_num != __malloc_trace_last_num)
+ {
+ long new_window;
+ int new_ref_count;
+
+ /* START W: Switch window. */
+
+ /* W1. Acquire the global window lock. */
- (void) mutex_unlock (&__malloc_trace_mutex);
++ __libc_lock_lock (__malloc_trace_mutex);
+
+ /* W2. If the thread-local window number is not -1, decrement the reference
+ counter for the current thread window. */
+ if (__malloc_trace_last_num != -1)
+ {
+ int old_window = __malloc_trace_last_num;
+ int old_ref_count = catomic_exchange_and_add (&__malloc_trace_buffer[old_window].ref_count, -1);
+ /* W3. If that reference counter reached 0, unmap the window. */
+ if (old_ref_count == 1)
+ {
+ __munmap (__malloc_trace_buffer[old_window].window, TRACE_MAPPING_SIZE);
+ __malloc_trace_buffer[old_window].window = NULL;
+ }
+ }
+
+ /* W4. Perform a load-relaxed of the global trace offset. */
+ my_trace_count = atomic_load_relaxed (&__malloc_trace_count);
+
+ /* W5. Increment the reference counter of the corresponding window. */
+ new_window = TRACE_COUNT_TO_MAPPING_NUM (my_trace_count);
+ new_ref_count = catomic_exchange_and_add (&__malloc_trace_buffer[new_window].ref_count, 1);
+
+ /* W6. If the incremented reference counter is 1, perform algorithm F. */
+ if (new_ref_count == 0)
+ {
+ /* START F: Map window from file. */
+
+ /* Note: There are security issues wrt opening a file by
+ name many times. We know this, and the risk is low (if
+ you have root access, there are better ways to wreak
+ havoc). We choose this design so that there isn't an
+ open file handle which may interefere with, or be
+ corrupted by, the running application. */
+
+ /* F1. Open the trace file. */
+ int trace_fd = __open (__malloc_trace_filename, O_RDWR|O_CREAT, 0666);
+ if (trace_fd < 0)
+ {
+ /* FIXME: Better handling of errors? */
+ __libc_message (0, "Can't open trace buffer file %s\n", __malloc_trace_filename);
+ atomic_store_release (&__malloc_trace_enabled, 0);
- (void) mutex_unlock (&__malloc_trace_mutex);
++ __libc_lock_unlock (__malloc_trace_mutex);
+ return;
+ }
+
+ /* F2. Extend the file length so that it covers the end of the current
+ window (using ftruncate, needed to avoid SIGBUS). */
+ __ftruncate (trace_fd, (new_window + 1) * TRACE_MAPPING_SIZE);
+
+ /* F3. Map the window from the file offset corresponding to
+ the current window. */
+ void *ptr =
+ __mmap (NULL, TRACE_MAPPING_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
+ trace_fd, new_window * TRACE_MAPPING_SIZE);
+ if (ptr == NULL)
+ {
+ /* FIXME: Better handling of errors? */
+ __libc_message (0, "Can't map trace_buffer file %s\n", __malloc_trace_filename);
+ atomic_store_release (&__malloc_trace_enabled, 0);
- (void) mutex_unlock (&__malloc_trace_mutex);
++ __libc_lock_unlock (__malloc_trace_mutex);
+ return;
+ }
+
+ /* F4. Update the mapping pointer in the active window array. */
+ __malloc_trace_buffer[new_window].window = ptr;
+
+ /* F5. Close the file. */
+ __close (trace_fd);
+
+ /* F6. Continue with step W7. */
+ /* END F */
+ }
+
+ /* W7. Assign the window number to the thread-local window number,
+ switching the thread window. */
+ __malloc_trace_last_num = new_window;
+
+ /* W8. Release the global window lock. */
- mutex_init (&__malloc_trace_mutex);
++ __libc_lock_unlock (__malloc_trace_mutex);
+
+ /* W9. Continue at T1. */
+ goto alg_t1;
+
+ /* END W */
+ }
+
+ /* T3. CAS-acqrel the incremented global trace offset. If CAS
+ fails, go back to T1. */
+ old_trace_count = catomic_exchange_and_add (&__malloc_trace_count, 1);
+ /* See if someone else incremented it while we weren't looking. */
+ if (old_trace_count != my_trace_count)
+ goto alg_t1;
+
+ /* T4. Write the trace data. */
+ /* At this point, __malloc_trace_buffer[my_num] is valid because we
+ DIDN'T go through algorithm W, and it's reference counted for us,
+ and my_trace_count points to our record. */
+ new_trace_ptr = __malloc_trace_buffer[my_num].window + TRACE_COUNT_TO_MAPPING_IDX (my_trace_count);
+
+ /* At this point, we move trace data from our temporary record
+ (where we've been recording, among other things, path data) to
+ the trace buffer. Future trace data for this call will get
+ recorded directly to the trace buffer. */
+ *new_trace_ptr = *trace_ptr;
+ trace_ptr = new_trace_ptr;
+}
+
+/* Initialize the trace buffer and backing file. The file is
+ overwritten if it already exists. */
+void
+__malloc_trace_init (char *filename)
+{
+ int pagesize = __sysconf(_SC_PAGE_SIZE);
+ int main_length = TRACE_N_MAPPINGS * sizeof (__malloc_trace_buffer[0]);
+ int total_length = (main_length + strlen(filename) + 1 + pagesize-1) & (~(pagesize-1));
+ char *mapping;
+
+ mapping = __mmap (NULL, total_length, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mapping == NULL)
+ return;
+
+ strcpy (mapping + main_length, filename);
+ __malloc_trace_filename = mapping + main_length;
+
+ __malloc_trace_buffer = (__malloc_trace_map_entry *) mapping;
+
++ __libc_lock_init (__malloc_trace_mutex);
+ __malloc_trace_count = 0;
+
+ __mtb_trace_entry (__MTB_TYPE_MAGIC, sizeof(void *), (void *)0x1234);
+ atomic_store_release (&__malloc_trace_enabled, 1);
+ /* This will reset __malloc_trace_enabled if it fails. */
+ __mtb_trace_record ();
+}
+
+/* All remaining functions return current count of trace records. */
+
+/* Pause - but don't stop - tracing. */
+size_t __malloc_trace_pause (void)
+{
+ atomic_store_release (&__malloc_trace_enabled, 0);
+ return atomic_load_relaxed (&__malloc_trace_count);
+}
+
+/* Resume tracing where it left off when paused. */
+size_t __malloc_trace_unpause (void)
+{
+ if (__malloc_trace_buffer != NULL)
+ atomic_store_release (&__malloc_trace_enabled, 1);
+ return atomic_load_relaxed (&__malloc_trace_count);
+}
+
+/* Stop tracing and clean up all the trace buffer mappings. */
+size_t __malloc_trace_stop (void)
+{
+ atomic_store_release (&__malloc_trace_enabled, 0);
+ /* FIXME: we can't actually release everything until all threads
+ have finished accessing the buffer, but we have no way of doing
+ that... */
+
+ /* For convenience, reduce the file size to only what's needed, else
+ the minimum file size we'll see if 64 Mb. */
+ int trace_fd = __open (__malloc_trace_filename, O_RDWR|O_CREAT, 0666);
+ if (trace_fd >= 0)
+ {
+ __ftruncate (trace_fd, __malloc_trace_count * sizeof (struct __malloc_trace_buffer_s));
+ __close (trace_fd);
+ }
+
+ return atomic_load_relaxed (&__malloc_trace_count);
+}
+
+/* Sync all buffer data to file (typically a no-op on Linux). */
+size_t __malloc_trace_sync (void)
+{
+ return atomic_load_relaxed (&__malloc_trace_count);
+}
+
+/* CONCURRENCY NOTES: The load acquire here synchronizes with the store release
+ from __malloc_trace_init to ensure that all threads see the initialization
+ done by the first thread that calls __malloc_trace_init. The load acquire
+ also synchronizes with the store releases in __mtb_trace_entry to ensure
+ that all error cleanup is visible. Lastly it synchronizes with the store
+ releases from __malloc_trace_pause, __malloc_trace_unpause, and
+ __malloc_trace_top to ensure that all external changes are visible to the
+ current thread. */
+
+/* Note: ENTRY is for function entry, and starts a per-thread record.
+ RECORD migrates that record into the common trace buffer. Timing
+ of the RECORD is critical to getting a valid trace record; it
+ should only be called when the function owns the pointers being
+ recorded. I.e. malloc should RECORD after obtaining a pointer,
+ free should RECORD before free'ing it. */
+
+/* Be careful that __MTB_TRACE_RECORD is not called inside your own
+ ENABLE/DISABLE pair (this applies to your own call frame, not a
+ nested call). */
+
+#define __MTB_TRACE_ENTRY(type, size, ptr1) \
+ if (__glibc_unlikely (atomic_load_acquire (&__malloc_trace_enabled)) \
+ && __glibc_unlikely (__malloc_thread_trace_enabled)) \
+ __mtb_trace_entry (__MTB_TYPE_##type,size,ptr1);
+#define __MTB_TRACE_RECORD() \
+ if (__glibc_unlikely (atomic_load_acquire (&__malloc_trace_enabled)) \
+ && __glibc_unlikely (__malloc_thread_trace_enabled)) \
+ __mtb_trace_record ();
+
+/* Ignore __malloc_thread_trace_enabled and set path bits. This allows us to
+ track the path of a call without additional traces. For example realloc
+ can call malloc and free without making new trace, but we record the paths
+ taken in malloc and free. */
+#define __MTB_TRACE_PATH(mpath) \
+ if (__glibc_unlikely (trace_ptr != NULL)) \
+ trace_ptr->path_##mpath = 1;
+
+#define __MTB_TRACE_SET(var,value) \
+ if (__glibc_unlikely (__malloc_thread_trace_enabled) \
+ && __glibc_unlikely (trace_ptr != NULL)) \
+ trace_ptr->var = value;
+
+/* Allow __MTB_TRACE_ENTRY to create new trace entries. */
+#define __MTB_THREAD_TRACE_ENABLE() \
+ ({ \
+ __malloc_thread_trace_enabled = 1; \
+ })
+
+/* Disallow __MTB_TRACE_ENTRY from creating new trace
+ entries. Use of __MTB_TRACE_SET becomes a NOOP, but
+ __MTB_TRACE_PATH still sets the unique path bit in
+ the trace (all path bits are unique). */
+#define __MTB_THREAD_TRACE_DISABLE() \
+ ({ \
+ __malloc_thread_trace_enabled = 0; \
+ })
+
+#else
+void __malloc_trace_init (char *filename) {}
+size_t __malloc_trace_pause (void) { return 0; }
+size_t __malloc_trace_unpause (void) { return 0; }
+size_t __malloc_trace_stop (void) { return 0; }
+size_t __malloc_trace_sync (void) { return 0; }
+
+#define __MTB_TRACE_ENTRY(type,size,ptr1)
+#define __MTB_TRACE_RECORD()
+#define __MTB_TRACE_PATH(mpath)
+#define __MTB_TRACE_SET(var,value)
+#define __MTB_THREAD_TRACE_ENABLE()
+#define __MTB_THREAD_TRACE_DISNABLE()
+#endif
+
/* ------------------ MMAP support ------------------ */
.trim_threshold = DEFAULT_TRIM_THRESHOLD,
#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
.arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+ ,
+ .tcache_count = TCACHE_FILL_COUNT,
+ .tcache_max = TCACHE_IDX-1
+#endif
};
-
+/* Non public mallopt parameters. */
- #define M_ARENA_TEST -7
- #define M_ARENA_MAX -8
++#if USE_TCACHE
+#define M_TCACHE_COUNT -9
+#define M_TCACHE_MAX -10
-
++#endif
+
/* Maximum size of memory handled in fastbins. */
static INTERNAL_SIZE_T global_max_fast;
/*------------------------ Public wrappers. --------------------------------*/
- static mutex_t tcache_mutex = _LIBC_LOCK_INITIALIZER;
+#if USE_TCACHE
+
+typedef struct TCacheEntry {
+ struct TCacheEntry *next;
+} TCacheEntry;
+
+typedef struct TCache {
+ struct TCache *prev, *next;
+ char initted; /* 0 = uninitted, 1 = normal, anything else = shutting down */
+ char counts[TCACHE_IDX];
+ TCacheEntry *entries[TCACHE_IDX];
+} TCache;
+
+static TCache *tcache_list = NULL;
- (void) mutex_lock (&tcache_mutex);
++__libc_lock_define_initialized (static, tcache_mutex);
+
+static __thread TCache tcache = {0,0,0,{0},{0}};
+
+static void __attribute__ ((section ("__libc_thread_freeres_fn")))
+tcache_thread_freeres (void)
+{
+ if (tcache.initted == 1)
+ {
- (void) mutex_unlock (&tcache_mutex);
++ libc_lock_lock (tcache_mutex);
+ tcache.initted = 2;
+ if (tcache.next)
+ tcache.next->prev = tcache.prev;
+ if (tcache.prev)
+ tcache.prev->next = tcache.next;
+ else
+ tcache_list = tcache.next;
++ libc_lock_unlock (tcache_mutex);
+ }
+}
+text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
+
+#endif
+
void *
__libc_malloc (size_t bytes)
{
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
- return (*hook)(bytes, RETURN_ADDRESS (0));
+ {
+ __MTB_TRACE_PATH (hook);
+ __MTB_THREAD_TRACE_DISABLE ();
+ victim = (*hook)(bytes, RETURN_ADDRESS (0));
+ __MTB_THREAD_TRACE_ENABLE ();
+ __MTB_TRACE_RECORD ();
+ if (victim != NULL)
+ __MTB_TRACE_SET (size3, chunksize (mem2chunk (victim)));
+ return victim;
+ }
+
+#if USE_TCACHE
+ /* int_free also calls request2size, be careful to not pad twice. */
+ size_t tbytes = request2size(bytes);
+ size_t tc_idx = size2tidx (tbytes);
+
+ if (tcache.initted == 0)
+ {
+ tcache.initted = 1;
- (void) mutex_lock (&tcache_mutex);
++ __libc_lock_lock (tcache_mutex);
+ tcache.next = tcache_list;
+ if (tcache.next)
+ tcache.next->prev = &tcache;
+ tcache_list = &tcache;
- (void) mutex_unlock (&tcache_mutex);
++ __libc_lock_unlock (tcache_mutex);
+ }
+
+ if (tc_idx < mp_.tcache_max
+ && tc_idx < TCACHE_IDX /* to appease gcc */
+ && tcache.entries[tc_idx] != NULL
+ && tcache.initted == 1)
+ {
+ TCacheEntry *e = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e->next;
+ tcache.counts[tc_idx] --;
+ __MTB_TRACE_RECORD ();
+ __MTB_TRACE_PATH (thread_cache);
+ __MTB_TRACE_SET (ptr2, e);
+ __MTB_TRACE_SET (size3, tbytes);
+ return (void *) e;
+ }
+#endif
+
+#if 0 && USE_TCACHE
+ /* This is fast but causes internal fragmentation, as it always
+ pulls large chunks but puts small chunks, leading to a large
+ backlog of small chunks. */
+ if (tc_idx < mp_.tcache_max
+ && tcache.initted == 1)
+ {
+ void *ent;
+ size_t tc_bytes = tc_idx * MALLOC_ALIGNMENT;
+ size_t tc_ibytes;
+ size_t total_bytes;
+ int i;
+
+ assert (tc_bytes >= tbytes);
+
+ if (tc_bytes < 2 * SIZE_SZ)
+ tc_bytes = 2 * SIZE_SZ;
+ tc_ibytes = tc_bytes + 2*SIZE_SZ;
+
+ total_bytes = tc_bytes + tc_ibytes * mp_.tcache_count;
+
+ __MTB_TRACE_PATH (thread_cache);
+ __MTB_TRACE_PATH (cpu_cache);
- (void) mutex_unlock (&ar_ptr->mutex);
+ arena_get (ar_ptr, total_bytes);
+
+ if (ar_ptr)
+ {
+ ent = _int_malloc (ar_ptr, total_bytes);
+ /* Retry with another arena only if we were able to find a usable arena
+ before. */
+ if (!ent && ar_ptr != NULL)
+ {
+ __MTB_TRACE_PATH (cpu_cache2);
+ LIBC_PROBE (memory_malloc_retry, 1, total_bytes);
+ ar_ptr = arena_get_retry (ar_ptr, total_bytes);
+ ent = _int_malloc (ar_ptr, total_bytes);
+ //_m_printf("tc2: av %p sz %lx rv %p\n", ar_ptr, total_bytes, ent);
+ }
+
+ if (ent)
+ {
+ mchunkptr m = mem2chunk (ent);
+ TCacheEntry *e;
+ int flags = m->size & SIZE_BITS;
+ size_t old_size = m->size & ~SIZE_BITS;
+ size_t extra = old_size - total_bytes - 2*SIZE_SZ;
+
+#if 0
+ tid = syscall(__NR_gettid);
+ _m_printf("%04x tc: av %p sz %5lx.%5lx.%2d rv %p %16lx %16lx %d\n",
+ tid, ar_ptr, m->size, total_bytes, (int)extra, ent, (int64_t)m->prev_size, (int64_t)m->size, bytes);
+#endif
+ if (flags & IS_MMAPPED)
+ {
+ write (2, "\033[31mMMAPPED CACHE BLOCK\033[0m\n", 29);
+ }
+
+ m->size = tc_ibytes | flags;
+ flags |= PREV_INUSE;
+
+ for (i = 0; i < mp_.tcache_count; i++)
+ {
+ m = (mchunkptr) (ent + i * tc_ibytes + tc_bytes);
+ e = (TCacheEntry *) (ent + i * tc_ibytes + tc_ibytes);
+
+ // _m_printf("%04x \t%p %d\n", tid, m, tc_ibytes);
+ /* Not needed because the previous chunk is "in use". */
+ m->size = tc_ibytes | flags;
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ tcache.counts[tc_idx] ++;
+ }
+ m->size = (tc_ibytes + extra) | flags;
+ /* Not needed because our last chunk is "in use". */
+ /*m = (mchunkptr) (ent + total_bytes);
+ m->prev_size = tc_ibytes + extra;*/
+ }
+
+ /* This must go after the above code to ensure that other
+ threads see our changes, even though we're sending this chunk
+ up to the app. */
+ if (ar_ptr != NULL)
++ __libc_lock_unlock (ar_ptr->mutex);
+
+ __MTB_TRACE_RECORD ();
+ __MTB_TRACE_SET(ptr2, ent);
+ __MTB_TRACE_SET (size3, chunksize (mem2chunk (ent)));
+ return ent;
+ }
+ }
+#endif
+
+ __MTB_TRACE_PATH (cpu_cache);
arena_get (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
return newmem;
}
- (void) mutex_lock (&ar_ptr->mutex);
+ __libc_lock_lock (ar_ptr->mutex);
+ /* We expect _int_realloc() to call MTB_TRACE_RECORD for us, if it
+ returns non-NULL. */
newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
- (void) mutex_unlock (&ar_ptr->mutex);
+ __libc_lock_unlock (ar_ptr->mutex);
assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
ar_ptr == arena_for_chunk (mem2chunk (newp)));
}
if (av != NULL)
- (void) mutex_unlock (&av->mutex);
+ __libc_lock_unlock (av->mutex);
+ __MTB_TRACE_RECORD ();
+
/* Allocation failed even after a retry. */
if (mem == 0)
return 0;
bck->fd = bin;
if (av != &main_arena)
- victim->size |= NON_MAIN_ARENA;
+ set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb);
- tc_victim->size |= NON_MAIN_ARENA;
+#if USE_TCACHE
+ /* While we're here, if we see other chunk of the same size,
+ stash them in the tcache. */
+ size_t tc_idx = size2tidx (nb-SIZE_SZ);
+ if (tc_idx < mp_.tcache_max)
+ {
+ mchunkptr tc_victim;
+ int found = 0;
+
+ /* While bin not empty and tcache not full, copy chunks over. */
+ while (tcache.counts[tc_idx] < mp_.tcache_count
+ && (tc_victim = last(bin)) != bin)
+ {
+ if (tc_victim != 0)
+ {
+ bck = tc_victim->bk;
+ set_inuse_bit_at_offset (tc_victim, nb);
+ if (av != &main_arena)
++ set_non_main_arena (tc_victim);
+ bin->bk = bck;
+ bck->fd = bin;
+
+ TCacheEntry *e = (TCacheEntry *) chunk2mem(tc_victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ tcache.counts[tc_idx] ++;
+ found ++;
+ //_m_printf("snarf chunk %p %lx %p %lx\n", tc_victim, nb,
+ // chunk_at_offset(tc_victim, nb), chunk_at_offset(tc_victim, nb)->size);
+ }
+ }
+ //_m_printf("%d chunks found in smallbin\n", found);
+ }
+#endif
+ //_m_printf("%d: return %p\n", __LINE__, victim);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
int iters = 0;
while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
{
+ __MTB_TRACE_PATH(unsorted_remove);
bck = victim->bk;
- if (__builtin_expect (victim->size <= 2 * SIZE_SZ, 0)
- || __builtin_expect (victim->size > av->system_mem, 0))
+ if (__builtin_expect (chunksize_nomask (victim) <= 2 * SIZE_SZ, 0)
+ || __builtin_expect (chunksize_nomask (victim)
+ > av->system_mem, 0))
malloc_printerr (check_action, "malloc(): memory corruption",
chunk2mem (victim), av);
size = chunksize (victim);
{
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
- victim->size |= NON_MAIN_ARENA;
+ set_non_main_arena (victim);
+
+#if USE_TCACHE
+ /* Fill cache first, return to user only if cache fills.
+ We may return one of these chunks later. */
+ if (tcache_nb
+ && tcache.counts[tc_idx] < mp_.tcache_count)
+ {
+ TCacheEntry *e = (TCacheEntry *) chunk2mem(victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ tcache.counts[tc_idx] ++;
+ return_cached = 1;
+ continue;
+ }
+ else
+ {
+#endif
-
check_malloced_chunk (av, victim, nb);
+ //_m_printf("%d: return %p\n", __LINE__, victim);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
check_inuse_chunk(av, p);
nextp = p->fd;
+ __MTB_TRACE_PATH(fastbin_remove);
/* Slightly streamlined version of consolidation code in free() */
- size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
+ size = chunksize (p);
nextchunk = chunk_at_offset(p, size);
nextsize = chunksize(nextchunk);
const char *errstr = NULL;
+ /* We must call __MTB_TRACE_RECORD if we return non-NULL. */
+
/* oldmem size */
- if (__builtin_expect (oldp->size <= 2 * SIZE_SZ, 0)
+ if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
|| __builtin_expect (oldsize >= av->system_mem, 0))
{
errstr = "realloc(): invalid old size";
case M_ARENA_MAX:
if (value > 0)
- {
- LIBC_PROBE (memory_mallopt_arena_max, 2, value, mp_.arena_max);
- mp_.arena_max = value;
- }
+ do_set_arena_test (value);
break;
+#if USE_TCACHE
+ case M_TCACHE_COUNT:
+ if (value >= 0)
+ {
+ LIBC_PROBE (memory_mallopt_tcache_count, 2, value, mp_.tcache_count);
+ mp_.tcache_count = value;
+ }
+ break;
+ case M_TCACHE_MAX:
+ if (value >= 0)
+ {
+ value = size2tidx (value);
+ if (value < TCACHE_IDX)
+ {
+ LIBC_PROBE (memory_mallopt_tcache_max, 2, value, mp_.tcache_max);
+ mp_.tcache_max = value;
+ }
+ }
+ break;
+#endif
}
- (void) mutex_unlock (&av->mutex);
+ __libc_lock_unlock (av->mutex);
return res;
}
libc_hidden_def (__libc_mallopt)