From 7328eadb40411e561e6cce905dba331a1739c63a Mon Sep 17 00:00:00 2001 From: DJ Delorie Date: Fri, 3 Jun 2016 15:26:48 -0400 Subject: [PATCH] Build fixes for in-tree and 32/64-bit Expand the comments in mtrace-ctl.c to better explain how to use this tracing controller. The new docs assume the SO is built and installed. Build fixed for trace_run.c Additional build pedantry to let trace_run.c be built with more warnings/errors turned on. Build/install trace_run and trace2dat trace2dat takes dump files from mtrace-ctl.so and turns them into mmap'able data files for trace_run, which "plays back" the logged calls. 32-bit compatibility Redesign tcache macros to account for differences between 64 and 32 bit systems. --- malloc/Makefile | 14 ++++++- malloc/malloc.c | 17 ++++----- malloc/mtrace-ctl.c | 25 ++++++++++--- malloc/mtrace.h | 6 +-- malloc/trace_run.c | 90 +++++++++++++++++++++++++++++---------------- 5 files changed, 101 insertions(+), 51 deletions(-) diff --git a/malloc/Makefile b/malloc/Makefile index 2626a39031d..30248e47daf 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -50,6 +50,13 @@ libmemusage-inhibit-o = $(filter-out .os,$(object-suffixes)) libmtracectl-routines = mtrace-ctl libmtracectl-inhibit-o = $(filter-out .os,$(object-suffixes)) +others: $(objpfx)trace_run +install-bin = trace_run +install-bin-script = trace2dat + +$(objpfx)trace_run: $(objpfx)trace_run.o + $(LINK.o) -o $@ $(objpfx)trace_run.o -lpthread + $(objpfx)tst-malloc-backtrace: $(shared-thread-library) $(objpfx)tst-malloc-thread-exit: $(shared-thread-library) $(objpfx)tst-malloc-thread-fail: $(shared-thread-library) @@ -63,7 +70,7 @@ aux := set-freeres thread-freeres # The Perl script to analyze the output of the mtrace functions. ifneq ($(PERL),no) -install-bin-script = mtrace +install-bin-script += mtrace generated += mtrace # The Perl script will print addresses and to do this nicely we must know @@ -82,7 +89,7 @@ ifneq ($(cross-compiling),yes) # If the gd library is available we build the `memusagestat' program. ifneq ($(LIBGD),no) others: $(objpfx)memusage -install-bin = memusagestat +install-bin += memusagestat install-bin-script += memusage generated += memusagestat memusage extra-objs += memusagestat.o @@ -147,6 +154,9 @@ tst-malloc-usable-ENV = MALLOC_CHECK_=3 sLIBdir := $(shell echo $(slibdir) | sed 's,lib\(\|64\)$$,\\\\$$LIB,') +$(objpfx)trace2dat: trace2dat + cp $^ $@ && chmod +x $@ + $(objpfx)mtrace: mtrace.pl rm -f $@.new sed -e 's|@PERL@|$(PERL)|' -e 's|@XXX@|$(address-width)|' \ diff --git a/malloc/malloc.c b/malloc/malloc.c index c020fa2b986..57aca089fa9 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1103,7 +1103,7 @@ volatile size_t __malloc_trace_buffer_head = 0; static __thread __malloc_trace_buffer_ptr trace_ptr; static void -__mtb_trace_entry (uint32_t type, int64_t size, void *ptr1) +__mtb_trace_entry (uint32_t type, size_t size, void *ptr1) { size_t head1; @@ -1122,7 +1122,7 @@ __mtb_trace_entry (uint32_t type, int64_t size, void *ptr1) trace_ptr->path_m_f_realloc = 0; trace_ptr->path = 0; trace_ptr->size = size; - trace_ptr->ptr1 = (uint64_t) ptr1; + trace_ptr->ptr1 = ptr1; trace_ptr->ptr2 = 0; } @@ -1158,7 +1158,7 @@ __malloc_get_trace_buffer (size_t *bufcount, size_t *bufhead) #define __MTB_TRACE_SET(var,value) \ if (__builtin_expect (trace_ptr != NULL, 1)) \ - trace_ptr->var = (uint64_t) value; + trace_ptr->var = value; #else #define __MTB_TRACE_ENTRY(type,size,ptr1) @@ -3000,11 +3000,10 @@ mremap_chunk (mchunkptr p, size_t new_size) #define USE_TCACHE 1 #if USE_TCACHE -#define TCACHE_SHIFT 4 /* we want 64 entries */ -#define MAX_TCACHE_SIZE (1024 - (1 << TCACHE_SHIFT)) -#define TCACHE_IDX ((MAX_TCACHE_SIZE >> TCACHE_SHIFT)+1) -#define size2tidx(bytes) (((bytes)+(1<>TCACHE_SHIFT) +#define MAX_TCACHE_SIZE (MALLOC_ALIGNMENT * 63) +#define TCACHE_IDX ((MAX_TCACHE_SIZE / MALLOC_ALIGNMENT) + 1) +#define size2tidx(bytes) (((bytes) + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT) /* Rounds up, so... idx 0 bytes 0 @@ -3105,7 +3104,7 @@ __libc_malloc (size_t bytes) && tcache.initted == 1) { void *ent; - size_t tc_bytes = tc_idx << TCACHE_SHIFT; + size_t tc_bytes = tc_idx * MALLOC_ALIGNMENT; size_t tc_ibytes; size_t total_bytes; int i; @@ -4414,7 +4413,7 @@ _int_free (mstate av, mchunkptr p, int have_lock) #if USE_TCACHE { - int tc_idx = size2tidx (size - SIZE_SZ*2); + int tc_idx = size2tidx (size - SIZE_SZ); if (size < MAX_TCACHE_SIZE && tcache.counts[tc_idx] < TCACHE_FILL_COUNT diff --git a/malloc/mtrace-ctl.c b/malloc/mtrace-ctl.c index 949098a2712..e1446ac1df9 100644 --- a/malloc/mtrace-ctl.c +++ b/malloc/mtrace-ctl.c @@ -5,13 +5,26 @@ #include #include -/* Build like this: +/* This module is a stand-alone control program for malloc's internal + trace buffer. It is intended to be preloaded like this: - gcc -shared -fpic mtrace-ctl.c -o /tmp/mtrace-ctl.so ../../glibc.build/libc.so + LD_PRELOAD=/usr/lib/libmtracectl.so ./myprog - Invoke like this: + This module uses the following environment variables: - LD_PRELOAD=/tmp/mtrace-ctl.so ./myprog + MTRACE_CTL_COUNT - how many records to store (default: 1000). Each + record is 32 bytes, and the entire buffer is mmap'd at once. If + the buffer isn't big enough, it will overwrite early records with + newer ones. The total number of trace records is reported in the + output file so that a larger buffer may be allocated on future runs. + + MTRACE_CTL_FILE - the output file name (default: + /tmp/mtrace-$$.out). Note that the default is per-pid but there is + no way to specify a per-pid pattern via this environment variable. + + The output file will contain a header that says how many trace + records were seen (which is usually more or less than the trace + buffer size). The trace buffer is then dumped one entry per line. */ @@ -106,9 +119,9 @@ djend(void) t->path_munmap ? 'U' : '-', t->path_m_f_realloc ? 'R' : '-', t->path_hook ? 'H' : '-', - (long long unsigned int) t->ptr1, + (long long unsigned int) (size_t) t->ptr1, (long long unsigned int) t->size, - (long long unsigned int) t->ptr2); + (long long unsigned int) (size_t) t->ptr2); break; } } diff --git a/malloc/mtrace.h b/malloc/mtrace.h index 8dea72c472d..db3cbe51a68 100644 --- a/malloc/mtrace.h +++ b/malloc/mtrace.h @@ -36,9 +36,9 @@ struct __malloc_trace_buffer_s { uint32_t path_hook:1; /* A hook was used to complete the request */ uint32_t path:16; /* remaining bits */ - uint64_t ptr1; - uint64_t ptr2; - uint64_t size; + void *ptr1; + void *ptr2; + size_t size; }; typedef struct __malloc_trace_buffer_s *__malloc_trace_buffer_ptr; diff --git a/malloc/trace_run.c b/malloc/trace_run.c index 52d548738ec..e34ad90c01c 100644 --- a/malloc/trace_run.c +++ b/malloc/trace_run.c @@ -25,7 +25,11 @@ #define C_NTHREADS 10 #define C_START_THREAD 11 -static __inline__ int64_t rdtsc_s(void) +#ifdef x86_64 + +#define ticks_t int64_t + +static __inline__ ticks_t rdtsc_s(void) { unsigned a, d; asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx"); @@ -33,7 +37,7 @@ static __inline__ int64_t rdtsc_s(void) return ((unsigned long)a) | (((unsigned long)d) << 32); } -static __inline__ int64_t rdtsc_e(void) +static __inline__ ticks_t rdtsc_e(void) { unsigned a, d; asm volatile("rdtscp" : "=a" (a), "=d" (d)); @@ -41,9 +45,31 @@ static __inline__ int64_t rdtsc_e(void) return ((unsigned long)a) | (((unsigned long)d) << 32); } -static int64_t diff_timeval (struct timeval e, struct timeval s) +#else + +#define ticks_t int32_t + +static __inline__ ticks_t rdtsc_s(void) +{ + unsigned a, d; + asm volatile("cpuid" ::: "%ax", "%bx", "%cx", "%dx"); + asm volatile("rdtsc" : "=a" (a), "=d" (d)); + return ((unsigned long)a) | (((unsigned long)d) << 16); +} + +static __inline__ ticks_t rdtsc_e(void) +{ + unsigned a, d; + asm volatile("rdtscp" : "=a" (a), "=d" (d)); + asm volatile("cpuid" ::: "%ax", "%bx", "%cx", "%dx"); + return ((unsigned long)a) | (((unsigned long)d) << 16); +} + +#endif + +static ticks_t diff_timeval (struct timeval e, struct timeval s) { - int64_t usec; + ticks_t usec; if (e.tv_usec < s.tv_usec) usec = (e.tv_usec + 1000000 - s.tv_usec) + (e.tv_sec-1 - s.tv_sec)*1000000; else @@ -65,7 +91,7 @@ pthread_mutex_t cmutex = PTHREAD_MUTEX_INITIALIZER; static char cbuf[NCBUF][30]; static int ci = 0; -char *comma(int64_t x) +char *comma(ticks_t x) { char buf[30], *bs, *bd; int l, i, idx; @@ -77,7 +103,7 @@ char *comma(int64_t x) bs = buf; bd = cbuf[idx]; - sprintf(buf, "%lld", x); + sprintf(buf, "%lld", (long long int)x); l = strlen(buf); i = l; while (*bs) @@ -101,21 +127,23 @@ static unsigned char *data; static size_t n_data; static pthread_mutex_t stat_mutex = PTHREAD_MUTEX_INITIALIZER; -int64_t malloc_time = 0, malloc_count = 0; -int64_t calloc_time = 0, calloc_count = 0; -int64_t realloc_time = 0, realloc_count = 0; -int64_t free_time = 0, free_count = 0; +ticks_t malloc_time = 0, malloc_count = 0; +ticks_t calloc_time = 0, calloc_count = 0; +ticks_t realloc_time = 0, realloc_count = 0; +ticks_t free_time = 0, free_count = 0; pthread_mutex_t stop_mutex = PTHREAD_MUTEX_INITIALIZER; int threads_done = 0; //#define dprintf printf -#define dprintf(...) 1 +#define dprintf(...) (void)1 //#define mprintf printf -#define mprintf(...) 1 +//#define MDEBUG 1 +#define mprintf(...) (void)1 #define myabort() my_abort_2(me, __LINE__) +void my_abort_2 (pthread_t me, int line) { fprintf(stderr, "Abort thread %d at line %d\n", (int)me, line); @@ -127,16 +155,10 @@ wmem (volatile void *ptr, int count) { char *p = (char *)ptr; int i; - size_t sz; if (!p) return; - // sz = *((size_t *)ptr-1) & ~7; - // fprintf(stderr, "wmem: %p size %x csize %x\n", ptr, - // count, sz); - // if (sz < 4*sizeof(size_t)) - // abort(); for (i=0; i %s)\n", comma(res_end.ru_maxrss - res_start.ru_maxrss), comma(res_start.ru_maxrss), comma(res_end.ru_maxrss)); @@ -473,12 +499,14 @@ main(int argc, char **argv) } #endif +#if 0 /* This will fail (crash) for system glibc but that's OK. */ __malloc_scan_chunks(malloc_scan_callback); malloc_info (0, stdout); +#endif -#if 1 +#if 0 /* ...or report them as used. */ for (idx=0; idx