Problem created by a discrepancy between the initial main stack
anon segment, and the main stack registered in m_stacks.c
Looking at some tracing; we see that there are two pages of stack:
--9078:2:main tell tool about
0ffefff000-
0fff000fff rw-
The stack between the base and the current sp is marked as not accessible:
--9078:2:main mark stack inaccessible
0ffefff000-
0fff0004bf
This is matching the aspacemgr view:
--9078:1:aspacem 22: RSVN
0ffe801000-
0ffeffefff 8380416 ----- SmUpper
--9078:1:aspacem 23: anon
0ffefff000-
0fff000fff 8192 rw---
(all the above is normal/as expected)
However, the main stack is registered in m_stacks.c as having only one page:
--9078:2:stacks register 0xFFF000000-0xFFF000FFF as stack 0
When the main stack is grown, m_stacks.c is informed by m_signals.c
that the stack is grown. This is done by trapping the signal 11
when a not mapped page is accessed.
However, the 2nd page does not cause a signal (as it is mapped).
So, m_stacks.c still believes the main has one page stack.
This then gives problems in the tracking of the SP and current_stack
in m_stacks.c.
Only one page was registered for the main stack, as the registration
was done with values computed before possibly adding a page
needed for the ABI redzone.
The fix is to properly register the main stack with the size of
the stack segment, once all aspects have been taken into account.
With the fix, the stack is registered as:
--31501:2:stacks register 0xFFEFFF000-0xFFF000FFF as stack 0
Another possible fix would be to always register the main stack with the
full size of the aspacemgr stack segment (i.e. the anon+RSVN above)
(idea is that this is similar to non main threads, for which the
full thread stack is registered from the beginning, even if not fully
used yet).
The first fix was preferred, assuming it is better to keep registering
the main stack "physical" size (and not its maximal size).
Test memcheck/tests/thread_alloca added, based on reproducer
done by Daniel Stodden.
The bug might be triggered or not depending on the initial value
of the SP, which is influenced by the size of the "env".
So, the test execs itself, growing each time the environment.
This has given a reasonable chance/way to reproduce the bug on Ubuntu 12
and on a Debian 6.
(tested on amd64/Ubuntu 12 and Debian 6
x86/fedora12
ppc64/fedora18
Note that while investigating this bug, another strange thing was seen:
thread stacks are registered in m_stacks.c but are never unregistered.
It is not very clear that it is needed or not to unregister them:
thread stack segments are not freed when a thread terminates :
when a thread slot is re-used, its thread stack will also be re-used.
(Is that good for address space mgt ? A process that has created many
temporary threads will have the thread stacks lost forever ???).
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13467
/* Client address space, lowest to highest (see top of ume.c) */
// TODO: get rid of as many of these as possible.
+/* ***Initial*** lowest address of the stack segment of the main thread.
+ The main stack will grow if needed but VG_(clstk_base) will
+ not be changed according to the growth. */
Addr VG_(clstk_base) = 0;
+/* Initial highest address of the stack segment of the main thread. */
Addr VG_(clstk_end) = 0;
UWord VG_(clstk_id) = 0;
/* The max stack size */
clstack_max_size = VG_PGROUNDUP(clstack_max_size);
- /* Record stack extent -- needed for stack-change code. */
- VG_(clstk_base) = clstack_start;
- VG_(clstk_end) = clstack_end;
-
if (0)
VG_(printf)("stringsize=%d auxsize=%d stacksize=%d maxsize=0x%x\n"
"clstack_start %p\n"
vg_assert(ok);
vg_assert(!sr_isError(res));
+
+ /* Record stack extent -- needed for stack-change code. */
+ VG_(clstk_base) = anon_start -inner_HACK;
+ VG_(clstk_end) = VG_(clstk_base) + anon_size +inner_HACK -1;
+
}
/* ==================== create client stack ==================== */
#include "pub_core_stacks.h"
#include "pub_core_tooliface.h"
+// For expensive debugging
+#define EDEBUG(fmt, args...) //VG_(debugLog)(2, "stacks", fmt, ## args)
+
/*
The stack
~~~~~~~~~
#define IF_STACK_SWITCH_SET_current_stack_AND_RETURN \
Word delta = (Word)new_SP - (Word)old_SP; \
\
+ EDEBUG("current_stack %p-%p %lu new_SP %p old_SP %p\n", \
+ (void *) (current_stack ? current_stack->start : 0x0), \
+ (void *) (current_stack ? current_stack->end : 0x0), \
+ current_stack ? current_stack->id : 0, \
+ (void *)new_SP, (void *)old_SP); \
+ \
/* Check if the stack pointer is still in the same stack as before. */ \
if (UNLIKELY(current_stack == NULL || \
new_SP < current_stack->start || new_SP > current_stack->end)) { \
/* The stack pointer is now in another stack. Update the current */ \
/* stack information and return without doing anything else. */ \
current_stack = new_stack; \
+ EDEBUG("new current_stack %p-%p %lu \n", \
+ (void *) current_stack->start, \
+ (void *) current_stack->end, \
+ current_stack->id); \
return; \
- } \
+ } else \
+ EDEBUG("new current_stack not found\n"); \
}
#define IF_BIG_DELTA_complaints_AND_RETURN \
test-plo-yes.vgtest test-plo-yes.stdout.exp \
test-plo-yes.stderr.exp-le64 test-plo-yes.stderr.exp-le32 \
test-plo-no.stderr.exp-s390x-mvc \
+ thread_alloca.stderr.exp thread_alloca.vgtest \
trivialleak.stderr.exp trivialleak.vgtest trivialleak.stderr.exp2 \
undef_malloc_args.stderr.exp undef_malloc_args.vgtest \
unit_libcbase.stderr.exp unit_libcbase.vgtest \
supp_unknown supp1 supp2 suppfree \
test-plo \
trivialleak \
+ thread_alloca \
undef_malloc_args \
unit_libcbase unit_oset \
varinfo1 varinfo2 varinfo3 varinfo4 \
err_disable3_LDADD = -lpthread
err_disable4_LDADD = -lpthread
+thread_alloca_LDADD = -lpthread
error_counts_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
--- /dev/null
+/* Reproduces bug 321960 (based on test from Daniel Stodden).
+ At least on Ubuntu 12 and 13, causes invalid write errors
+ in __yell or the memset call (due to some part of the main
+ stack being marked as not addressable in memcheck).
+ Bug seems extremely sensitive to initial conditions:
+ Depending on the size of the env, bug is triggered or not.
+ Also, a high nr of threads in thr[] is needed to get
+ the problem. */
+#include <pthread.h>
+#include <alloca.h>
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+void *
+nop(void *nil)
+{
+ return NULL;
+}
+
+void
+__yell(void)
+{
+ char buf[256];
+ memset(buf, 0, sizeof(buf));
+}
+
+/* Without argument, executes once.
+ Otherwise first arg indicates nr of times the process will exec
+ itself, each time increasing the size of the environment
+ by about 50 characters. */
+int main(int argc, char **argv, char** envp)
+{
+ pthread_t thr[50];
+ int i, err;
+
+ for (i = 0; i < sizeof(thr) / sizeof(*thr); i++) {
+ err = pthread_create(&thr[i], NULL, nop, NULL);
+ assert(!err);
+ }
+
+ alloca(4096);
+ __yell();
+
+ for (i = 0; i < sizeof(thr) / sizeof(*thr); i++)
+ pthread_join(thr[i], NULL);
+
+ if ( argc == 2 && atoi(argv[1]) > 0) {
+ /* exec ourselves with some more env */
+ char** new_env;
+ char more_env[100];
+ char n[10];
+ int j;
+
+ sprintf(more_env, "N%d=ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ", atoi(argv[1]));
+ for (j = 0; envp[j]; j++)
+ ;
+ new_env = malloc((j+2) * sizeof(char*));
+ assert (new_env != NULL);
+ for (i = 0; i < j; i++)
+ new_env[i] = envp[i];
+ new_env[i++] = more_env;
+ new_env[i++] = NULL;
+ assert(i == j+2);
+ sprintf (n, "%d", atoi(argv[1]) - 1);
+ // system ("env | wc");
+ execle(argv[0], argv[0], n, NULL, new_env);
+ assert(0);
+ } else
+ return 0;
+}
--- /dev/null
+prog: thread_alloca 30
+vgopts: -q --trace-children=yes