libmtracectl-routines = mtrace-ctl
libmtracectl-inhibit-o = $(filter-out .os,$(object-suffixes))
-others: $(objpfx)trace_run
-install-bin = trace_run
-install-bin-script = trace2dat
+others: $(objpfx)trace_run $(objpfx)trace2wl
+install-bin = trace_run trace2wl
$(objpfx)trace_run: $(objpfx)trace_run.o
$(LINK.o) -o $@ $(objpfx)trace_run.o -lpthread
+ifeq (${CXX},)
+CXX = g++
+endif
+
+$(objpfx)trace2wl: $(objpfx)trace2wl.o
+ $(CXX) -g -std=gnu++11 -o $@ $(objpfx)trace2wl.o
+
+$(objpfx)trace2wl.o: trace2wl.cc
+ $(CXX) $(CXXFLAGS) -g -o $@ -c $< $(compile-mkdep-flags) -std=gnu++11
+
$(objpfx)tst-malloc-backtrace: $(shared-thread-library)
$(objpfx)tst-malloc-thread-exit: $(shared-thread-library)
$(objpfx)tst-malloc-thread-fail: $(shared-thread-library)
sLIBdir := $(shell echo $(slibdir) | sed 's,lib\(\|64\)$$,\\\\$$LIB,')
-$(objpfx)trace2dat: trace2dat
- cp $^ $@ && chmod +x $@
-
$(objpfx)mtrace: mtrace.pl
rm -f $@.new
sed -e 's|@PERL@|$(PERL)|' -e 's|@XXX@|$(address-width)|' \
<pre>
$ <b>ls -l /tmp/mtrace-*</b>
--rw-r--r--. 1 root root 12422 Jun 2 20:53 mtrace-1188.out
+-rw-r--r--. 1 root root 12422 Jun 2 20:53 mtrace.out.1188
</pre>
-<p>Each generated file is a plain ASCII text file, with some headers
- followed by one line per trace record entry. The syntax is not
- "official" but intended to be machine-readable, and some scripts are
- included in the COPR repo to process the generated files.</p>
-
-<pre>
-$ <b>head -1 /tmp/mtrace-1188.out</b>
-158 out of 1000 events captured
-</pre>
-
-<p>If first number is more than the second number, then the trace only
-includes the <em>last</em> however-many records. You can specify a
-larger buffer via envirionment variables, like this:</p>
-
-<pre>
-$ <b>MTRACE_CTL_COUNT=100000 LD_PRELOAD=/lib64/libmtracectl.so ls</b>
-</pre>
-
-(again, or /lib/ for 32-bit machines)
+<p>Each generated file is a binary file, specific to the architecture,
+ with one record per trace record entry. Some programs are included
+ in the COPR repo to process the generated files. Please make sure
+ you process these files on the same architecture as they were
+ generated on.</p>
<h2>Sending Us Trace Files</h2>
<pre>
$ <b>cd /tmp</b>
-$ <b>gzip -9 mtrace-1188.out</b>
-$ <b>mv mtrace-1188.out.gz f24-ls-fred.mtrace.gz</b> (or whatever name fits :)
+$ <b>gzip -9 mtrace.out.1188</b>
+$ <b>mv mtrace.out.1188.gz f24-ls-fred.mtrace.gz</b> (or whatever name fits :)
</pre>
<p>Then mail <tt>f24-ls-fred.mtrace.gz</tt> to dj@redhat.com (or
benchmark suite, for example.</p>
<pre>
-trace2dat <em>outfile</em> [<em>infile ...</em>]
+trace2wl <em>outfile</em> [<em>infile ...</em>]
</pre>
If an infile is not provided, input is read from stdin.
<pre>
-$ trace2dat /tmp/ls.wl /tmp/mtrace-22172.out
+$ trace2wl /tmp/ls.wl /tmp/mtrace-22172.out
</pre>
The resulting file is a "workload" - a data file that tells the
__libc_scratch_buffer_set_array_size;
# malloc trace hooks for mtrace-ctl
- __malloc_set_trace_buffer;
- __malloc_get_trace_buffer;
+ __malloc_trace_init;
+ __malloc_trace_pause;
+ __malloc_trace_unpause;
+ __malloc_trace_stop;
+ __malloc_trace_sync;
__malloc_scan_chunks;
}
#if USE_MTRACE
#include "mtrace.h"
-volatile __malloc_trace_buffer_ptr __malloc_trace_buffer = NULL;
-volatile size_t __malloc_trace_buffer_size = 0;
-volatile size_t __malloc_trace_buffer_head = 0;
+typedef struct __malloc_trace_map_entry_s {
+ int ref_count;
+ __malloc_trace_buffer_ptr window;
+} __malloc_trace_map_entry;
+/* 16 Tb max file size, 64 Mb per window */
+#define TRACE_MAPPING_SIZE 67108864
+#define TRACE_N_PER_MAPPING (TRACE_MAPPING_SIZE / sizeof (struct __malloc_trace_buffer_s))
+#define TRACE_N_MAPPINGS 262144
+#define TRACE_MAX_COUNT (TRACE_N_PER_MAPPING * TRACE_N_MAPPINGS)
+
+/* Index into __malloc_trace_buffer[] */
+#define TRACE_COUNT_TO_MAPPING_NUM(count) ((count) / TRACE_N_PER_MAPPING)
+/* Index info __malloc_trace_buffer[n][] */
+#define TRACE_COUNT_TO_MAPPING_IDX(count) ((count) % TRACE_N_PER_MAPPING)
+
+/* Global mutex for the trace buffer tree itself. */
+mutex_t __malloc_trace_mutex;
+
+/* Global counter, "full" when equal to TRACE_MAX_COUNT. Points to
+ the next available slot, so POST-INCREMENT it. */
+volatile size_t __malloc_trace_count = 0;
+
+/* Array of TRACE_N_MAPPINGS pointers to potentially mapped trace buffers. */
+volatile __malloc_trace_map_entry *__malloc_trace_buffer = NULL;
+/* The file we're mapping them to. */
+char * __malloc_trace_filename = NULL;
+
+volatile int __malloc_trace_enabled = 0;
+
+static __thread int __malloc_trace_last_num = -1;
static __thread __malloc_trace_buffer_ptr trace_ptr;
static inline pid_t
static void
__mtb_trace_entry (uint32_t type, size_t size, void *ptr1)
{
- size_t head1;
+ size_t my_trace_count;
+ size_t old_trace_count;
+ int my_num;
+
+ /* START T: Log trace event. */
+ alg_t1:
+ /* T1. Perform a load-acq of the global trace offset. */
+ my_trace_count = atomic_load_acquire (&__malloc_trace_count);
+
+ /* T2. If the window number is different from the current
+ thread-local window number, proceed with algorithm W below. */
+ my_num = TRACE_COUNT_TO_MAPPING_NUM (my_trace_count);
+ if (my_num != __malloc_trace_last_num)
+ {
+ int new_window;
+ int new_ref_count;
+
+ /* START W: Switch window. */
+
+ /* W1. Acquire the global window lock. */
+ (void) mutex_lock (&__malloc_trace_mutex);
+
+ /* W2. If the thread-local window number is not -1, decrement the reference
+ counter for the current thread window. */
+ if (__malloc_trace_last_num != -1)
+ {
+ int old_window = TRACE_COUNT_TO_MAPPING_NUM (__malloc_trace_last_num);
+ int old_ref_count = catomic_exchange_and_add (&__malloc_trace_buffer[old_window].ref_count, -1);
+ /* W3. If that reference counter reached 0, unmap the window. */
+ if (old_ref_count == 1)
+ {
+ munmap (__malloc_trace_buffer[old_window].window, TRACE_MAPPING_SIZE);
+ __malloc_trace_buffer[old_window].window = NULL;
+ }
+ }
+
+ /* W4. Perform a load-relaxed of the global trace offset. */
+ my_trace_count = atomic_load_relaxed (&__malloc_trace_count);
+
+ /* W5. Increment the reference counter of the corresponding window. */
+ new_window = TRACE_COUNT_TO_MAPPING_NUM (my_trace_count);
+ new_ref_count = catomic_exchange_and_add (&__malloc_trace_buffer[new_window].ref_count, 1);
+
+ /* W6. If the incremented reference counter is 1, perform algorithm F. */
+ if (new_ref_count == 0)
+ {
+ /* START F: Map window from file. */
+
+ /* Note: There are security issues wrt opening a file by
+ name many times. We know this, and the risk is low (if
+ you have root access, there are better ways to wreak
+ havoc). We choose this design so that there isn't an
+ open file handle which may interefere with, or be
+ corrupted by, the running application. */
+
+ /* F1. Open the trace file. */
+ int trace_fd = open (__malloc_trace_filename, O_RDWR|O_CREAT, 0666);
+ if (trace_fd < 0)
+ {
+ /* FIXME: Better handling of errors? */
+ _m_printf("Can't open trace_buffer file %s\n", __malloc_trace_filename);
+ __malloc_trace_enabled = 0;
+ return;
+ }
+
+ /* F2. Extend the file length so that it covers the end of the current
+ window (using ftruncate, needed to avoid SIGBUS). */
+ ftruncate (trace_fd, (new_window + 1) * TRACE_MAPPING_SIZE);
+
+ /* F3. Map the window from the file offset corresponding to
+ the current window. */
+ void *ptr =
+ mmap (NULL, TRACE_MAPPING_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
+ trace_fd, new_window * TRACE_MAPPING_SIZE);
+ if (ptr == NULL)
+ {
+ /* FIXME: Better handling of errors? */
+ _m_printf("Can't map trace_buffer file %s\n", __malloc_trace_filename);
+ __malloc_trace_enabled = 0;
+ return;
+ }
+
+ /* F4. Update the mapping pointer in the active window array. */
+ __malloc_trace_buffer[new_window].window = ptr;
+
+ /* F5. Close the file. */
+ close (trace_fd);
+
+ /* F6. Continue with step W7. */
+ /* END F */
+ }
+
+ /* W7. Assign the window number to the thread-local window number,
+ switching the thread window. */
+ __malloc_trace_last_num = new_window;
- head1 = catomic_exchange_and_add (&__malloc_trace_buffer_head, 1);
+ /* W8. Release the global window lock. */
+ (void) mutex_unlock (&__malloc_trace_mutex);
- trace_ptr = __malloc_trace_buffer + (head1 % __malloc_trace_buffer_size);
+ /* W9. Continue at T1. */
+ goto alg_t1;
+
+ /* END W */
+ }
+
+ /* T3. CAS-acqrel the incremented global trace offset. If CAS
+ fails, go back to T1. */
+ old_trace_count = catomic_exchange_and_add (&__malloc_trace_count, 1);
+ /* See if someone else incremented it while we weren't looking. */
+ if (old_trace_count != my_trace_count)
+ goto alg_t1;
+
+ /* T4. Write the trace data. */
+ /* At this point, __malloc_trace_buffer[my_num] is valid because we
+ DIDN'T go through algorithm W, and it's reference counted for us,
+ and my_trace_count points to our record. */
+ trace_ptr = __malloc_trace_buffer[my_num].window + TRACE_COUNT_TO_MAPPING_IDX (my_trace_count);
trace_ptr->thread = __gettid ();
trace_ptr->type = type;
trace_ptr->ptr2 = 0;
}
-int
-__malloc_set_trace_buffer (void *bufptr, size_t bufsize)
+/* Initialize the trace buffer and backing file. The file is
+ overwritten if it already exists. */
+void
+__malloc_trace_init (char *filename)
{
- __malloc_trace_buffer = 0;
- __malloc_trace_buffer_size = bufsize / sizeof(struct __malloc_trace_buffer_s);
- __malloc_trace_buffer_head = 0;
- __malloc_trace_buffer = (__malloc_trace_buffer_ptr) bufptr;
- return sizeof(struct __malloc_trace_buffer_s);
+ int pagesize = sysconf(_SC_PAGE_SIZE);
+ int main_length = TRACE_N_MAPPINGS * sizeof (__malloc_trace_buffer[0]);
+ int total_length = (main_length + strlen(filename) + 1 + pagesize-1) & (~(pagesize-1));
+ char *mapping;
+
+ mapping = mmap (NULL, total_length, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mapping == NULL)
+ return;
+
+ strcpy (mapping + main_length, filename);
+ __malloc_trace_filename = mapping + main_length;
+
+ __malloc_trace_buffer = (__malloc_trace_map_entry *) mapping;
+
+ mutex_init (&__malloc_trace_mutex);
+ __malloc_trace_count = 0;
+
+ __mtb_trace_entry (__MTB_TYPE_MAGIC, sizeof(void *), (void *)0x1234);
+
+ atomic_store_release (&__malloc_trace_enabled, 1);
}
-void *
-__malloc_get_trace_buffer (size_t *bufcount, size_t *bufhead)
+/* All remaining functions return current count of trace records. */
+
+/* Pause - but don't stop - tracing. */
+size_t __malloc_trace_pause (void)
{
- if (bufcount)
- *bufcount = __malloc_trace_buffer_size;
- if (bufhead)
- *bufhead = __malloc_trace_buffer_head;
- return __malloc_trace_buffer;
+ atomic_store_release (&__malloc_trace_enabled, 0);
+ return atomic_load_relaxed (&__malloc_trace_count);
}
+/* Resume tracing where it left off when paused. */
+size_t __malloc_trace_unpause (void)
+{
+ if (__malloc_trace_buffer != NULL)
+ atomic_store_release (&__malloc_trace_enabled, 1);
+ return atomic_load_relaxed (&__malloc_trace_count);
+}
+
+/* Stop tracing and clean up all the trace buffer mappings. */
+size_t __malloc_trace_stop (void)
+{
+ atomic_store_release (&__malloc_trace_enabled, 0);
+ /* FIXME: we can't actually release everything until all threads
+ have finished accessing the buffer, but we have no way of doing
+ that... */
+
+ /* For convenience, reduce the file size to only what's needed, else
+ the minimum file size we'll see if 64 Mb. */
+ int trace_fd = open (__malloc_trace_filename, O_RDWR|O_CREAT, 0666);
+ if (trace_fd >= 0)
+ {
+ ftruncate (trace_fd, __malloc_trace_count * sizeof (struct __malloc_trace_buffer_s));
+ close (trace_fd);
+ }
+
+ return atomic_load_relaxed (&__malloc_trace_count);
+}
+
+/* Sync all buffer data to file (typically a no-op on Linux). */
+size_t __malloc_trace_sync (void)
+{
+ return atomic_load_relaxed (&__malloc_trace_count);
+}
+
+
#define __MTB_TRACE_ENTRY(type,size,ptr1) \
- if (__builtin_expect (__malloc_trace_buffer != NULL, 0)) \
+ if (__builtin_expect (__malloc_trace_enabled, 0)) \
__mtb_trace_entry (__MTB_TYPE_##type,size,ptr1); \
else \
trace_ptr = 0;
trace_ptr->var = value;
#else
+void __malloc_trace_init (char *filename) {}
+size_t __malloc_trace_pause (void) { return 0; }
+size_t __malloc_trace_unpause (void) { return 0; }
+size_t __malloc_trace_stop (void) { return 0; }
+size_t __malloc_trace_sync (void) { return 0; }
+
#define __MTB_TRACE_ENTRY(type,size,ptr1)
#define __MTB_TRACE_PATH(mpath)
#define __MTB_TRACE_SET(var,value)
tc_bytes = 2 * SIZE_SZ;
tc_ibytes = tc_bytes + 2*SIZE_SZ;
- total_bytes = tc_bytes + tc_ibytes * TCACHE_FILL_COUNT
+ total_bytes = tc_bytes + tc_ibytes * TCACHE_FILL_COUNT;
__MTB_TRACE_PATH (thread_cache);
__MTB_TRACE_PATH (cpu_cache);
/* realloc of null is supposed to be same as malloc */
if (oldmem == 0)
- return __libc_malloc (bytes);
+ {
+ newp = __libc_malloc (bytes);
+ __MTB_TRACE_SET (ptr2, newp);
+ return newp;
+ }
/* chunk corresponding to oldmem */
const mchunkptr oldp = mem2chunk (oldmem);
#if HAVE_MREMAP
newp = mremap_chunk (oldp, nb);
if (newp)
- return chunk2mem (newp);
+ {
+ __MTB_TRACE_SET (ptr2, chunk2mem (newp));
+ return chunk2mem (newp);
+ }
#endif
/* Note the extra SIZE_SZ overhead. */
if (oldsize - SIZE_SZ >= nb)
- return oldmem; /* do nothing */
+ {
+ __MTB_TRACE_SET (ptr2, oldmem);
+ return oldmem; /* do nothing */
+ }
__MTB_TRACE_PATH (m_f_realloc);
memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
munmap_chunk (oldp);
+ __MTB_TRACE_SET (ptr2, newmem);
return newmem;
}
stash them in the tcache. */
if (nb-SIZE_SZ < MAX_TCACHE_SIZE)
{
- int tc_idx = size2tidx (bytes);
+ int tc_idx = size2tidx (nb-SIZE_SZ);
mchunkptr tc_victim;
int found = 0;
{
int tc_idx = size2tidx (size - SIZE_SZ);
- if (size < MAX_TCACHE_SIZE
+ if (size - SIZE_SZ < MAX_TCACHE_SIZE
&& tcache.counts[tc_idx] < TCACHE_FILL_COUNT
&& tcache.initted == 1)
{
}
weak_alias (__malloc_info, malloc_info)
-void
-__malloc_scan_chunks (void (*cb)(void *,size_t,int))
-{
-#if USE_TCACHE
- TCache *tc = tcache_list;
- while (tc)
- {
- cb(tc, 0, MSCAN_TCACHE);
- for (size_t i = 0; i < TCACHE_IDX; ++i)
- {
- TCacheEntry *te = tc->entries[i];
- for (int j = 0; j < tc->counts[i]; j++)
- {
- cb(mem2chunk(te), chunksize(mem2chunk(te)), MSCAN_TCACHE);
- te = te->next;
- }
- }
- tc = tc->next;
- }
-#endif
-
- mstate ar_ptr = &main_arena;
- do
- {
- cb(ar_ptr, 0, MSCAN_ARENA);
-
- if (ar_ptr != &main_arena)
- {
- heap_info *heap = heap_for_ptr (top (ar_ptr));
- while (heap)
- {
- cb(heap, heap->size, MSCAN_HEAP);
-
- heap = heap->prev;
- }
- };
-
- for (size_t i = 0; i < NFASTBINS; ++i)
- {
- mchunkptr p = fastbin (ar_ptr, i);
- while (p != NULL)
- {
- cb(p, chunksize(p), MSCAN_FASTBIN_FREE);
- p = p->fd;
- }
- }
-
- mbinptr bin;
- struct malloc_chunk *r;
- for (size_t i = 1; i < NBINS; ++i)
- {
- bin = bin_at (ar_ptr, i);
- r = bin->fd;
- if (r != NULL)
- while (r != bin)
- {
- cb(r, chunksize(r), (i == 1) ? MSCAN_UNSORTED : MSCAN_CHUNK_FREE);
- r = r->fd;
- }
- }
-
- cb(ar_ptr->top, chunksize(ar_ptr->top), MSCAN_TOP);
-
- ar_ptr = ar_ptr->next;
- }
- while (ar_ptr != &main_arena);
-}
-
-
strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
+#include <fcntl.h>
+#include <errno.h>
/* This module is a stand-alone control program for malloc's internal
trace buffer. It is intended to be preloaded like this:
#include "mtrace.h"
+#define estr(str) write (2, str, strlen (str))
+
+#if 0
static void
err(const char *str)
{
- write (2, str, strlen(str));
- write (2, "\n", 1);
- exit(1);
+ estr (str);
+ estr ("\n");
}
+#endif
-void __attribute__((constructor))
-djmain(void)
+/*
+ * mtrace_start - checks for buffer, allocates one if needed, starts trace.
+ * mtrace_stop - stops tracing
+ * mtrace_sync - syncs the buffer
+ * mtrace_reset - resets buffer state to intial state
+ */
+
+struct _malloc_trace_buffer_s *mtrace_buffer = NULL;
+size_t mtrace_buffer_bytesize = 0;
+
+int
+mtrace_start (void)
{
const char *e;
- size_t sz;
+ char *fname;
+ int sequence = 0;
e = getenv("MTRACE_CTL_COUNT");
if (!e)
e = "1000";
- sz = (size_t) atol(e) * sizeof(struct __malloc_trace_buffer_s);
- char *buf = mmap (NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (buf == NULL || buf == (char *)(-1))
- err("Cannot mmap");
+ e = getenv("MTRACE_CTL_FILE");
+ if (!e)
+ e = "/tmp/mtrace.out";
+
+ fname = alloca (strlen(e) + 30);
+ sprintf(fname, "%s.%d", e, getpid());
+ while (access (fname, F_OK) == 0)
+ {
+ sequence ++;
+ sprintf(fname, "%s.%d.%d", e, getpid(), sequence);
+ }
+
+ estr ("mtrace-ctl: writing to ");
+ estr (fname);
+ estr ("\n");
- buf[0] = 1;
- buf[sz-1] = 1;
+ __malloc_trace_init (fname);
+ return 0;
+}
- /* This must be the last thing we do. */
- __malloc_set_trace_buffer ((void *)buf, sz);
- return;
+void
+mtrace_stop (void)
+{
+ size_t count;
+ char line[100];
+
+ count = __malloc_trace_stop ();
+ sprintf (line, "mtrace-ctl: %lld entries recorded\n", (long long)count);
+ estr (line);
}
+void
+mtrace_sync (void)
+{
+ __malloc_trace_sync ();
+ // __malloc_trace_buffer_ptr buf = __malloc_get_trace_buffer (&size, &head);
+ // msync (buf, size * sizeof(struct __malloc_trace_buffer_s), MS_SYNC | MS_INVALIDATE);
+}
+
+void
+mtrace_reset (void)
+{
+
+ __malloc_trace_stop ();
+ mtrace_start ();
+}
+
+void __attribute__((constructor))
+mtrace_ctor(void)
+{
+ if (mtrace_start ())
+ exit (1);
+}
+
+void __attribute__((destructor))
+mtrace_dtor(void)
+{
+ mtrace_stop ();
+ mtrace_sync ();
+}
+
+#if 0
+
const char * const typenames[] = {
"unused ",
"malloc ",
FILE *outf;
size_t head, size, i;
- /* Prevent problems with recursion etc by shutting off trace right away. */
- __malloc_trace_buffer_ptr buf = __malloc_get_trace_buffer (&size, &head);
- __malloc_set_trace_buffer (NULL, 0);
-
e = getenv("MTRACE_CTL_FILE");
if (!e)
{
munmap (buf, size * sizeof(struct __malloc_trace_buffer_s));
return;
}
+#endif
typedef struct __malloc_trace_buffer_s *__malloc_trace_buffer_ptr;
-/* These three are only valid inside glibc proper. */
-extern volatile __malloc_trace_buffer_ptr __malloc_trace_buffer;
-extern volatile size_t __malloc_trace_buffer_size;
-extern volatile size_t __malloc_trace_buffer_head;
-
-/* bufptr is a random chunk of memory, bufsize is the size of that
- chunk in BYTES. Returns the size of __malloc_trace_buffer_s. The
- buffer should be filled with NUL bytes before passing, such that
- each record's type is UNUSED (below). The trace buffer may be
- disabled by passing NULL,0 although it's up to the caller to obtain
- and free/unmap the previous buffer first. */
-int __malloc_set_trace_buffer (void *bufptr, size_t bufsize);
-
-/* Returns the location of the buffer (same as passed above, or NULL).
- Also fills in BUFCOUNT which is the number of records (not bytes)
- in the buffer, and BUFHEAD which is the index of the most recently
- filled entry. NOTE that BUFHEAD might be greater than bufcount; if
- so it reflects the number of records that would have been stored
- had there been size, and the caller must modulo that by BUFCOUNT to
- get the ending index. The last BUFCOUNT records are stored;
- earlier records are overwritten. */
-void * __malloc_get_trace_buffer (size_t *bufcount, size_t *bufhead);
+/* Initialize the trace buffer and backing file. The file is
+ overwritten if it already exists. */
+void __malloc_trace_init (char *filename);
+
+/* All remaining functions return current count of trace records. */
+
+/* Pause - but don't stop - tracing. */
+size_t __malloc_trace_pause (void);
+
+/* Resume tracing where it left off when paused. */
+size_t __malloc_trace_unpause (void);
+
+/* Stop tracing and clean up all the trace buffer mappings. */
+size_t __malloc_trace_stop (void);
+
+/* Sync all buffer data to file (typically a no-op on Linux). */
+size_t __malloc_trace_sync (void);
#define __MTB_TYPE_UNUSED 0
+/* ptr1 is 0x1234, size is sizeof(void *) - there is one of these at
+ the beginning of the trace. */
+#define __MTB_TYPE_MAGIC 255
+
/* ptr2 = malloc (size) */
#define __MTB_TYPE_MALLOC 1
} MSCAN_Types;
void __malloc_scan_chunks (void (*callback)(void * /*ptr*/, size_t /*length*/, int /*type*/));
+
+/* Codes for the simulator/workload programs. */
+#define C_NOP 0
+#define C_DONE 1
+#define C_MALLOC 2
+#define C_CALLOC 3
+#define C_REALLOC 4
+#define C_FREE 5
+#define C_SYNC_W 6
+#define C_SYNC_R 7
+#define C_ALLOC_PTRS 8
+#define C_ALLOC_SYNCS 9
+#define C_NTHREADS 10
+#define C_START_THREAD 11
+++ /dev/null
-#!/usr/bin/perl
-# -*- perl -*-
-
-$outfile = shift @ARGV;
-
-$outfile = "trace2c.dat" unless $outfile;
-
-die("$outfile already exists") if -f $outfile;
-
-# Arrays starting with c_ are data code to be emitted later
-
-
-# Reserve idx 0 to be a NULL pointer
-$last_idx = 0;
-sub ptr2idx {
- my ($ptr) = @_;
- if ($ptr2idx{$ptr}) {
- return $ptr2idx{$ptr};
- }
- if ($ptr =~ /^0+$/) {
- return 0;
- }
- # we intentionally never return zero
- $last_idx ++;
- $ptr2idx{$ptr} = $last_idx;
- return $last_idx;
-}
-
-sub put_int {
- my ($val) = @_;
- if ($val < 0) {
- print STDERR "Error: negative value in put_int\n";
- exit(1);
- }
- my ($rv) = chr($val & 127);
- while ($val > 127) {
- $val >>= 7;
- $rv = chr(($val & 127) | 128) . $rv;
- }
- return $rv;
-}
-
-
-$sync_counter = 0;
-
-# thread 2 waits for thread 1
-sub sync {
- my ($thread1, $thread2) = @_;
- if (! $sync_init{$thread1}) {
- push (@c_sync, "volatile int sync_${thread1} = 0;");
- $sync_init{$thread1} = 1;
- }
- $sync_counter ++;
- $c_threads{$thread1} .= $c_sync_w . &put_int($sync_counter);
- $c_threads{$thread2} .= $c_sync_r . &put_int($sync_counter);
-}
-
-sub acq_ptr {
- my ($ptr) = @_;
- if ($owner{$ptr} && $owner{$ptr} ne $thread) {
- &sync ($owner{$ptr}, $thread);
- }
- $owner{$ptr} = $thread;
-}
-
-$master_thread = undef;
-
-# These must stay in sync with trace_run.c
-$c_nop = chr(0);
-$c_done = chr(1);
-$c_malloc = chr(2);
-$c_calloc = chr(3);
-$c_realloc = chr(4);
-$c_free = chr(5);
-$c_sync_w = chr(6);
-$c_sync_r = chr(7);
-$c_alloc_ptrs = chr(8);
-$c_alloc_syncs = chr(9);
-$c_nthreads = chr(10);
-$c_start_thread = chr(11);
-
-$line = 0;
-while (<>) {
- $line ++;
- next if /^threadid/;
- next if /out of/;
-
- ($thread, $type, $path, $ptr1, $size, $ptr2) = split(' ');
- $size = hex($size);
- $idx1 = &ptr2idx($ptr1);
- $idx2 = &ptr2idx($ptr2);
-
- if (! $master_thread) {
- $master_thread = $thread;
- } elsif (! $threads{$thread}) {
- # make new thread start at the "right" time
- &sync ($master_thread, $thread);
- }
-
- $threads{$thread} = 1;
-
- if ($type eq "malloc") {
- # In case another thread needs to free this chunk first
- &acq_ptr($ptr2);
- $c_threads{$thread} .= $c_malloc . &put_int($idx2) . &put_int($size);
- $leak{$ptr2} = $size;
- $owner{$ptr2} = $thread;
- $valid{$ptr2} = 1;
- }
-
- if ($type eq "calloc") {
- # In case another thread needs to free this chunk first
- &acq_ptr($ptr2);
- $c_threads{$thread} .= $c_calloc . &put_int($idx2) . &put_int($size);
- $leak{$ptr2} = $size;
- $owner{$ptr2} = $thread;
- $valid{$ptr2} = 1;
- }
-
- if ($type eq "free") {
- if ($ptr1 =~ /^0+$/) {
- $c_threads{$thread} .= $c_free . &put_int(0);
- } elsif ($valid{$ptr1}) {
- # if it was allocated in another thread
- &acq_ptr($ptr1);
- $c_threads{$thread} .= $c_free . &put_int($idx1);
- delete $leak{$ptr1};
- $valid{$ptr1} = 0;
- } else {
- #push (@{$c_threads{$thread}}, sprintf(" // free (p%s) (invalid ptr $ptr1 in thread $thread)", $idx1));
- }
- }
-
- if ($type eq "realloc") {
- if ($owner{$ptr1}) {
- &acq_ptr($ptr1);
- &acq_ptr($ptr2);
- $c_threads{$thread} .= $c_realloc . &put_int($idx2) . &put_int($idx1) . &put_int($size);
- # ptr1 might be the same as ptr2, so sequence matters
- delete $leak{$ptr1};
- $leak{$ptr2} = $size;
- $valid{$ptr1} = 0;
- $valid{$ptr2} = 1;
- }
- }
-}
-print $line . " lines read\n";
-
-$nthreads = 0;
-for $thread (sort keys %threads) {
- $c_threads{$thread} .= $c_done;
- $nthreads ++;
-}
-
-sub code_dump {
- my ($code) = @_;
- for $c (split(//, $code)) {
- print hex(ord($c)), " ";
- }
- print "\n";
-}
-
-sub gen_main {
- my ($len) = @_;
- my ($code);
- my ($ptr) = $len;
-
- $code = $c_alloc_ptrs . &put_int($last_idx+1);
- &code_dump($code);
- $code .= $c_alloc_syncs . &put_int($sync_counter+1);
-
- # Keep these together
- $code .= $c_nthreads . &put_int($nthreads);
- for $thread (sort keys %threads) {
- printf("Start thread offset %x\n", $ptr);
- $code .= $c_start_thread . &put_int ($ptr);
- $ptr += length($c_threads{$thread});
- }
-
- $code .= $c_done;
- return length($code), $code;
-}
-
-$len = 1;
-$nlen = 2;
-while ($len != $nlen) {
- $len = $nlen;
- ($nlen, $maincode) = &gen_main($len);
- print "main size $len/$nlen\n";
-}
-
-&code_dump($maincode);
-
-open(F, ">$outfile");
-print F $maincode;
-for $thread (sort keys %threads) {
- printf "Thread $thread size %10d " , length($c_threads{$thread});
- &code_dump (substr($c_threads{$thread}, 0, 16));
- print F $c_threads{$thread};
-}
-close (F);
-
-exit 0;
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include <unordered_map>
+
+// The trace file looks like an array of struct __malloc_trace_buffer_s
+#include "mtrace.h"
+
+// This application is "run once and exit" so there's no cleanup code.
+
+typedef unsigned char byte;
+
+struct __malloc_trace_buffer_s *trace_records;
+size_t num_trace_records;
+
+int verbose = 0;
+
+//------------------------------------------------------------
+// File data buffers
+
+#define BUFFER_SIZE 4096
+
+struct BufferBlock {
+ BufferBlock *next;
+ byte buf[BUFFER_SIZE];
+};
+
+struct Buffer {
+ BufferBlock *first_buffer;
+ BufferBlock *last_buffer;
+
+ int count_total;
+ int count_last;
+
+ Buffer();
+ void add (char x);
+ void add_int (size_t i);
+ void write (int fd);
+ void clear (void);
+};
+
+Buffer::Buffer()
+{
+ first_buffer = last_buffer = new BufferBlock();
+ count_total = count_last = 0;
+}
+
+void
+Buffer::add (char x)
+{
+ if (count_last == BUFFER_SIZE)
+ {
+ BufferBlock *b = new BufferBlock;
+ last_buffer->next = b;
+ last_buffer = b;
+ count_last = 0;
+ }
+ last_buffer->buf[count_last] = x;
+ count_last ++;
+ count_total ++;
+}
+
+void
+Buffer::add_int (size_t val)
+{
+ byte buf[sizeof(size_t)*2];
+ int i = 0;
+
+ buf[i++] = val & 127;
+ while (val > 127)
+ {
+ val >>= 7;
+ buf[i++] = (val & 127) | 128;
+ }
+ while (i > 0)
+ add (buf[--i]);
+}
+
+void
+Buffer::write (int fd)
+{
+ BufferBlock *b;
+ for (b = first_buffer; b != last_buffer; b = b->next)
+ ::write (fd, b->buf, BUFFER_SIZE);
+ if (count_last)
+ ::write (fd, last_buffer->buf, count_last);
+}
+
+void
+Buffer::clear (void)
+{
+ while (first_buffer != last_buffer)
+ {
+ BufferBlock *b = first_buffer->next;
+ delete first_buffer;
+ first_buffer = b;
+ }
+ count_total = count_last = 0;
+}
+
+//------------------------------------------------------------
+
+struct PerThread {
+ int started;
+ Buffer workload;
+ PerThread() : started(0), workload() {};
+ void add (byte x) { workload.add(x); }
+ void add_int (size_t x) { workload.add_int(x); }
+};
+
+typedef std::unordered_map<int32_t, PerThread*> PerThreadMap;
+PerThreadMap per_thread;
+
+struct PerAddr {
+ PerThread *owner;
+ void *ptr;
+ int idx;
+ int valid;
+ const char *reason;
+ int reason_idx;
+ PerAddr(void *_ptr) : owner(0), ptr(_ptr), valid(0), reason("not seen") {};
+};
+
+// Don't start at zero, zero is special.
+int addr_count = 1;
+
+std::unordered_map<void *, PerAddr*> per_addr;
+
+PerAddr *
+get_addr (void *ptr)
+{
+ PerAddr *p;
+ if (ptr == NULL)
+ return NULL;
+ p = per_addr[ptr];
+ if (!p)
+ {
+ p = per_addr[ptr] = new PerAddr(ptr);
+ p->idx = addr_count ++;
+ }
+ return p;
+}
+
+int sync_counter = 0;
+
+// Insert a release/acquire pair to transfer ownership of data
+// from thread TREL to thread TACK
+void
+sync_threads (PerThread *trel, PerThread *tacq)
+{
+ if (trel == tacq)
+ return;
+ sync_counter ++;
+ trel->add (C_SYNC_W);
+ trel->add_int (sync_counter);
+ tacq->add (C_SYNC_R);
+ tacq->add_int (sync_counter);
+}
+
+void
+acq_ptr (PerThread *thread, PerAddr *addr)
+{
+ if (addr == NULL)
+ return;
+ if (addr->owner != NULL && addr->owner != thread)
+ sync_threads (addr->owner, thread);
+ addr->owner = thread;
+}
+
+//------------------------------------------------------------
+
+int
+main(int argc, char **argv)
+{
+ int trace_fd, wl_fd;
+ struct stat stbuf;
+
+ if (argc > 1 && strcmp (argv[1], "-v") == 0)
+ {
+ verbose ++;
+ argc --;
+ argv ++;
+ }
+
+ if (argc != 3)
+ {
+ fprintf (stderr, "Usage: %s <outputfile.wl> <inputfile.mtrace>\n", argv[0]);
+ exit(1);
+ }
+
+ if (access (argv[1], F_OK) == 0)
+ {
+ fprintf (stderr, "Error: output file %s already exists, will not overwrite\n", argv[1]);
+ exit(1);
+ }
+
+ trace_fd = open (argv[2], O_RDONLY, 0666);
+ if (trace_fd < 0)
+ {
+ fprintf (stderr, "Can't open %s for reading\n", argv[2]);
+ perror("The error was");
+ exit(1);
+ }
+
+ if (stat (argv[2], &stbuf) < 0)
+ {
+ fprintf (stderr, "Can't stat %s for reading\n", argv[2]);
+ perror("The error was");
+ exit(1);
+ }
+
+ trace_records =
+ (struct __malloc_trace_buffer_s *)
+ mmap (NULL, stbuf.st_size, PROT_READ, MAP_SHARED, trace_fd, 0);
+ if (trace_records == (void *)(-1))
+ {
+ fprintf (stderr, "Can't map %s for reading\n", argv[2]);
+ perror("The error was");
+ exit(1);
+ }
+ num_trace_records = stbuf.st_size / sizeof(*trace_records);
+
+ PerThread *thread = NULL;
+ int last_tid = -1;
+ PerThread *master_thread = NULL;
+
+ per_addr[0] = NULL;
+
+ for (unsigned int i = 0; i < num_trace_records; i++)
+ {
+ __malloc_trace_buffer_s *r = trace_records + i;
+
+ // Quick-skip for NULs at EOF
+ if (r->type == __MTB_TYPE_UNUSED)
+ continue;
+
+ if(verbose)
+ printf("\033[32m%8x %2x (0x%p, 0x%x) = 0x%p\033[0m\n",
+ r->thread, r->type, r->ptr1, (int)r->size, r->ptr2);
+
+ if (r->thread != last_tid)
+ {
+ thread = per_thread[r->thread];
+ if (thread == NULL)
+ thread = per_thread[r->thread] = new PerThread();
+ last_tid = r->thread;
+ }
+ if (!master_thread)
+ {
+ master_thread = thread;
+ thread->started = 1;
+ }
+ else if (!thread->started)
+ {
+ sync_threads (master_thread, thread);
+ thread->started = 1;
+ }
+
+
+ PerAddr *pa1 = get_addr(r->ptr1);
+ PerAddr *pa2 = get_addr(r->ptr2);
+
+ switch (r->type)
+ {
+ case __MTB_TYPE_UNUSED:
+ case __MTB_TYPE_MAGIC:
+ break;
+
+ case __MTB_TYPE_MALLOC:
+ case __MTB_TYPE_CALLOC:
+ acq_ptr (thread, pa2);
+ if (pa2->valid)
+ printf ("%d: pointer %p malloc'd again? %d:%s\n", i, pa2->ptr, pa2->reason_idx, pa2->reason);
+ thread->add (r->type == __MTB_TYPE_MALLOC ? C_MALLOC : C_CALLOC);
+ thread->add_int (pa2->idx);
+ thread->add_int (r->size);
+ pa2->valid = 1;
+ pa2->reason = "malloc";
+ pa2->reason_idx = i;
+ break;
+
+ case __MTB_TYPE_FREE:
+ acq_ptr (thread, pa1);
+ if (pa1 == NULL)
+ {
+ thread->add (C_FREE);
+ thread->add_int (0);
+ }
+ else if (pa1->valid)
+ {
+ thread->add (C_FREE);
+ thread->add_int (pa1->idx);
+ pa1->valid = 0;
+ pa1->reason = "previously free'd";
+ pa1->reason_idx = i;
+ }
+ else
+ {
+ printf("%d: invalid pointer %p passed to free: %d:%s\n", i, pa1->ptr, pa1->reason_idx, pa1->reason);
+ }
+ break;
+
+ case __MTB_TYPE_REALLOC:
+ if (pa1 && pa1->owner)
+ acq_ptr (thread, pa1);
+ if (pa2 && pa2->owner)
+ acq_ptr (thread, pa2);
+ thread->add (C_REALLOC);
+ thread->add_int (pa2 ? pa2->idx : 0);
+ thread->add_int (pa1 ? pa1->idx : 0);
+ thread->add_int (r->size);
+
+ if (pa1)
+ {
+ pa1->valid = 0;
+ pa1->reason = "previously realloc'd";
+ pa1->reason_idx = i;
+ }
+ if (pa2)
+ {
+ pa2->valid = 1;
+ pa2->reason = "realloc";
+ pa2->reason_idx = i;
+ }
+
+ break;
+ }
+ }
+
+ int n_threads = per_thread.size();
+ PerThread *threads[n_threads];
+ int thread_off[n_threads];
+ int i = 0;
+
+ PerThreadMap::iterator iter;
+ if(verbose)
+ printf("%d threads\n", (int)per_thread.size());
+ for (iter = per_thread.begin();
+ iter != per_thread.end();
+ ++iter)
+ {
+ threads[i++] = iter->second;
+ iter->second->add(C_DONE);
+ if(verbose)
+ printf("thread: %d bytes\n", iter->second->workload.count_total);
+ }
+
+ /* The location of each thread's workload depends on the size of the
+ startup block, but the size of the startup block depends on the
+ size of the thread's location encoding. So, we loop until it
+ stabilizes. */
+ int old_len = 1;
+ int new_len = 2;
+ Buffer main_loop;
+ while (old_len != new_len)
+ {
+ int off = new_len;
+ int i;
+
+ old_len = new_len;
+ main_loop.clear ();
+
+ main_loop.add (C_ALLOC_PTRS);
+ main_loop.add_int (addr_count);
+ main_loop.add (C_ALLOC_SYNCS);
+ main_loop.add_int (sync_counter);
+ main_loop.add (C_NTHREADS);
+ main_loop.add_int (n_threads);
+
+ for (i=0; i<n_threads; i++)
+ {
+ thread_off[i] = off;
+ main_loop.add (C_START_THREAD);
+ main_loop.add_int (off);
+ off += threads[i]->workload.count_total;
+ }
+
+ main_loop.add (C_DONE);
+
+ new_len = main_loop.count_total;
+ }
+
+ wl_fd = open (argv[1], O_CREAT|O_EXCL|O_RDWR, 0666);
+ if (wl_fd < 0)
+ {
+ fprintf (stderr, "Can't open %s for writing\n", argv[1]);
+ perror("The error was");
+ exit(1);
+ }
+
+ main_loop.write (wl_fd);
+
+ for (i=0; i<n_threads; i++)
+ {
+ if (verbose)
+ printf("Start thread[%d] offset 0x%x\n", i, thread_off[i]);
+ threads[i]->workload.write (wl_fd);
+ }
+
+ close (wl_fd);
+
+ return 0;
+}
#include <fcntl.h>
#include "malloc.h"
-
-/* These must stay in sync with trace2dat */
-#define C_NOP 0
-#define C_DONE 1
-#define C_MALLOC 2
-#define C_CALLOC 3
-#define C_REALLOC 4
-#define C_FREE 5
-#define C_SYNC_W 6
-#define C_SYNC_R 7
-#define C_ALLOC_PTRS 8
-#define C_ALLOC_SYNCS 9
-#define C_NTHREADS 10
-#define C_START_THREAD 11
+#include "mtrace.h"
#if UINTPTR_MAX == 0xffffffffffffffff