From: Julian Seward Date: Tue, 18 Dec 2007 01:49:23 +0000 (+0000) Subject: Improve handling of programs which require very large main thread X-Git-Tag: svn/VALGRIND_3_4_0~1126 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4f282e99cece8375bc3820498961befed672041b;p=thirdparty%2Fvalgrind.git Improve handling of programs which require very large main thread stacks. Instead of hardwiring the main thread stack to a max of 16MB and segfaulting the app beyond that point, allow the user to specify the main stack size using the new flag --main-stacksize=. If said flag is not present, the current default, which is "MIN(16GB, current ulimit -s value)", is used. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@7302 --- diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c index 66a268578e..66acdf2374 100644 --- a/coregrind/m_initimg/initimg-linux.c +++ b/coregrind/m_initimg/initimg-linux.c @@ -593,7 +593,7 @@ Addr setup_client_stack( void* init_sp, VG_(printf)("valgrind: " "I failed to allocate space for the application's stack.\n"); VG_(printf)("valgrind: " - "This may be the result of a very large --max-stackframe=\n"); + "This may be the result of a very large --main-stacksize=\n"); VG_(printf)("valgrind: setting. Cannot continue. Sorry.\n\n"); VG_(exit)(0); } @@ -874,25 +874,28 @@ IIFinaliseImageInfo VG_(ii_create_image)( IICreateImageInfo iicii ) //-------------------------------------------------------------- { /* When allocating space for the client stack on Linux, take - notice of the --max-stackframe value. This makes it possible + notice of the --main-stacksize value. This makes it possible to run programs with very large (primary) stack requirements - simply by specifying --max-stackframe. */ + simply by specifying --main-stacksize. */ + /* Logic is as follows: + - by default, use the client's current stack rlimit + - if that exceeds 16M, clamp to 16M + - if a larger --main-stacksize value is specified, use that instead + - in all situations, the minimum allowed stack size is 1M + */ void* init_sp = iicii.argv - 1; SizeT m1 = 1024 * 1024; SizeT m16 = 16 * m1; - SizeT msf = VG_(clo_max_stackframe) + m1; - VG_(debugLog)(1, "initimg", "Setup client stack\n"); - /* For the max stack size, use the client's stack rlimit, but - clamp it to between 1M and 16M. */ - iifii.clstack_max_size = (SizeT)VG_(client_rlimit_stack).rlim_cur; - if (iifii.clstack_max_size < m1) iifii.clstack_max_size = m1; - if (iifii.clstack_max_size > m16) iifii.clstack_max_size = m16; - /* However, if --max-stackframe= is specified, and the given - value (+ 1 M for spare) exceeds the current setting, use the - max-stackframe input instead. */ - - if (iifii.clstack_max_size < msf) iifii.clstack_max_size = msf; - iifii.clstack_max_size = VG_PGROUNDUP(iifii.clstack_max_size); + SizeT szB = (SizeT)VG_(client_rlimit_stack).rlim_cur; + if (szB < m1) szB = m1; + if (szB > m16) szB = m16; + if (VG_(clo_main_stacksize) > 0) szB = VG_(clo_main_stacksize); + if (szB < m1) szB = m1; + szB = VG_PGROUNDUP(szB); + VG_(debugLog)(1, "initimg", + "Setup client stack: size will be %ld\n", szB); + + iifii.clstack_max_size = szB; iifii.initial_client_SP = setup_client_stack( init_sp, env, diff --git a/coregrind/m_main.c b/coregrind/m_main.c index 64b3a6e763..50992f48a2 100644 --- a/coregrind/m_main.c +++ b/coregrind/m_main.c @@ -149,6 +149,8 @@ static void usage_NORETURN ( Bool debug_help ) " --input-fd= file descriptor for input [0=stdin]\n" " --max-stackframe= assume stack switch for SP changes larger\n" " than bytes [2000000]\n" +" --main-stacksize= set size of main thread's stack (in bytes)\n" +" [use current 'ulimit' value]\n" "\n"; Char* usage2 = @@ -243,11 +245,22 @@ static void usage_NORETURN ( Bool debug_help ) } -/* Peer at previously set up VG_(args_for_valgrind) and extract any - request for help and also the tool name, and also set up - VG_(clo_max_stackframe). */ +/* Peer at previously set up VG_(args_for_valgrind) and do some + minimal command line processing that must happen early on: -static void get_helprequest_and_toolname ( Int* need_help, HChar** tool ) + - show the version string, if requested (-v) + - extract any request for help (--help, -h, --help-debug) + - get the toolname (--tool=) + - set VG_(clo_max_stackframe) (--max-stackframe=) + - set VG_(clo_main_stacksize) (--main-stacksize=) + + That's all it does. The main command line processing is done below + by main_process_cmd_line_options. Note that + main_process_cmd_line_options has to handle but ignore the ones we + have handled here. +*/ +static void early_process_cmd_line_options ( /*OUT*/Int* need_help, + /*OUT*/HChar** tool ) { UInt i; HChar* str; @@ -278,16 +291,21 @@ static void get_helprequest_and_toolname ( Int* need_help, HChar** tool ) } else if (VG_CLO_STREQN(7, str, "--tool=")) { *tool = &str[7]; - // Set up VG_(clo_max_stackframe). This is needed by - // VG_(ii_create_image), which happens before - // process_command_line_options(). - } else VG_NUM_CLO (str, "--max-stackframe", - VG_(clo_max_stackframe)); + // Set up VG_(clo_max_stackframe) and VG_(clo_main_stacksize). + // These are needed by VG_(ii_create_image), which happens + // before main_process_cmd_line_options(). + } + else VG_NUM_CLO(str, "--max-stackframe", VG_(clo_max_stackframe)) + else VG_NUM_CLO(str, "--main-stacksize", VG_(clo_main_stacksize)); } } -static Bool process_cmd_line_options( UInt* client_auxv, const char* toolname ) +/* The main processing for command line options. See comments above + on early_process_cmd_line_options. +*/ +static Bool main_process_cmd_line_options( UInt* client_auxv, + const HChar* toolname ) { // VG_(clo_log_fd) is used by all the messaging. It starts as 2 (stderr) // and we cannot change it until we know what we are changing it to is @@ -375,10 +393,13 @@ static Bool process_cmd_line_options( UInt* client_auxv, const char* toolname ) else VG_BOOL_CLO(arg, "--error-limit", VG_(clo_error_limit)) else VG_NUM_CLO (arg, "--error-exitcode", VG_(clo_error_exitcode)) else VG_BOOL_CLO(arg, "--show-emwarns", VG_(clo_show_emwarns)) - /* Already done in get_helprequest_and_toolname, but we need to - redundantly handle it again, so the flag does not get - rejected as invalid. */ + + /* The next two are already done in + early_process_cmd_line_options, but we need to redundantly + handle them again, so they do not get rejected as invalid. */ else VG_NUM_CLO (arg, "--max-stackframe", VG_(clo_max_stackframe)) + else VG_NUM_CLO (arg, "--main-stacksize", VG_(clo_main_stacksize)) + else VG_BOOL_CLO(arg, "--run-libc-freeres", VG_(clo_run_libc_freeres)) else VG_BOOL_CLO(arg, "--show-below-main", VG_(clo_show_below_main)) else VG_BOOL_CLO(arg, "--time-stamp", VG_(clo_time_stamp)) @@ -1404,20 +1425,21 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) // because the tool has not been initialised. // p: split_up_argv [for VG_(args_for_valgrind)] //-------------------------------------------------------------- - VG_(debugLog)(1, "main", "Preprocess command line opts\n"); - get_helprequest_and_toolname(&need_help, &toolname); + VG_(debugLog)(1, "main", + "(early_) Process Valgrind's command line options\n"); + early_process_cmd_line_options(&need_help, &toolname); // Set default vex control params LibVEX_default_VexControl(& VG_(clo_vex_control)); //-------------------------------------------------------------- // Load client executable, finding in $PATH if necessary - // p: get_helprequest_and_toolname() [for 'exec', 'need_help'] - // p: layout_remaining_space [so there's space] + // p: early_process_cmd_line_options() [for 'exec', 'need_help'] + // p: layout_remaining_space [so there's space] // // Set up client's environment - // p: set-libdir [for VG_(libdir)] - // p: get_helprequest_and_toolname [for toolname] + // p: set-libdir [for VG_(libdir)] + // p: early_process_cmd_line_options [for toolname] // // Setup client stack, eip, and VG_(client_arg[cv]) // p: load_client() [for 'info'] @@ -1544,8 +1566,8 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) //-------------------------------------------------------------- // If --tool and --help/--help-debug was given, now give the core+tool // help message - // p: get_helprequest_and_toolname() [for 'need_help'] - // p: tl_pre_clo_init [for 'VG_(tdict).usage'] + // p: early_process_cmd_line_options() [for 'need_help'] + // p: tl_pre_clo_init [for 'VG_(tdict).usage'] //-------------------------------------------------------------- VG_(debugLog)(1, "main", "Print help and quit, if requested\n"); if (need_help) { @@ -1557,9 +1579,10 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) // p: setup_client_stack() [for 'VG_(client_arg[cv]'] // p: setup_file_descriptors() [for 'VG_(fd_xxx_limit)'] //-------------------------------------------------------------- - VG_(debugLog)(1, "main", "Process Valgrind's command line options, " - "setup logging\n"); - logging_to_fd = process_cmd_line_options(client_auxv, toolname); + VG_(debugLog)(1, "main", + "(main_) Process Valgrind's command line options, " + "setup logging\n"); + logging_to_fd = main_process_cmd_line_options(client_auxv, toolname); //-------------------------------------------------------------- // Zeroise the millisecond counter by doing a first read of it. @@ -1570,8 +1593,9 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) //-------------------------------------------------------------- // Print the preamble // p: tl_pre_clo_init [for 'VG_(details).name' and friends] - // p: process_cmd_line_options() [for VG_(clo_verbosity), VG_(clo_xml), - // logging_to_fd] + // p: main_process_cmd_line_options() [for VG_(clo_verbosity), + // VG_(clo_xml), + // logging_to_fd] //-------------------------------------------------------------- VG_(debugLog)(1, "main", "Print the preamble...\n"); print_preamble(logging_to_fd, toolname); @@ -1605,7 +1629,7 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) //-------------------------------------------------------------- // Allow GDB attach - // p: process_cmd_line_options() [for VG_(clo_wait_for_gdb)] + // p: main_process_cmd_line_options() [for VG_(clo_wait_for_gdb)] //-------------------------------------------------------------- /* Hook to delay things long enough so we can get the pid and attach GDB in another shell. */ @@ -1634,7 +1658,7 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) //-------------------------------------------------------------- // Search for file descriptors that are inherited from our parent - // p: process_cmd_line_options [for VG_(clo_track_fds)] + // p: main_process_cmd_line_options [for VG_(clo_track_fds)] //-------------------------------------------------------------- if (VG_(clo_track_fds)) { VG_(debugLog)(1, "main", "Init preopened fds\n"); @@ -1833,7 +1857,7 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) //-------------------------------------------------------------- // Read suppression file - // p: process_cmd_line_options() [for VG_(clo_suppressions)] + // p: main_process_cmd_line_options() [for VG_(clo_suppressions)] //-------------------------------------------------------------- if (VG_(needs).core_errors || VG_(needs).tool_errors) { VG_(debugLog)(1, "main", "Load suppressions\n"); diff --git a/coregrind/m_options.c b/coregrind/m_options.c index 6326971d86..1c70428167 100644 --- a/coregrind/m_options.c +++ b/coregrind/m_options.c @@ -85,6 +85,7 @@ Bool VG_(clo_track_fds) = False; Bool VG_(clo_show_below_main)= False; Bool VG_(clo_show_emwarns) = False; Word VG_(clo_max_stackframe) = 2000000; +Word VG_(clo_main_stacksize) = 0; /* use client's rlimit.stack */ Bool VG_(clo_wait_for_gdb) = False; VgSmc VG_(clo_smc_check) = Vg_SmcStack; HChar* VG_(clo_kernel_variant) = NULL; diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c index f63d89bc9a..50c9ed15d1 100644 --- a/coregrind/m_scheduler/scheduler.c +++ b/coregrind/m_scheduler/scheduler.c @@ -466,6 +466,14 @@ ThreadId VG_(scheduler_init_phase1) ( void ) tid_main = VG_(alloc_ThreadState)(); + /* Bleh. Unfortunately there are various places in the system that + assume that the main thread has a ThreadId of 1. + - Helgrind (possibly) + - stack overflow message in default_action() in m_signals.c + - definitely a lot more places + */ + vg_assert(tid_main == 1); + return tid_main; } diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c index 0ab607cab7..de2b49b912 100644 --- a/coregrind/m_signals.c +++ b/coregrind/m_signals.c @@ -1289,6 +1289,23 @@ static void default_action(const vki_siginfo_t *info, ThreadId tid) if (tid != VG_INVALID_THREADID) { VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size)); } + + if (sigNo == VKI_SIGSEGV + && info && info->si_code > VKI_SI_USER + && info->si_code == VKI_SEGV_MAPERR) { + VG_(message)(Vg_UserMsg, " If you believe this happened as a " + "result of a stack overflow in your"); + VG_(message)(Vg_UserMsg, " program's main thread (unlikely but" + " possible), you can try to increase"); + VG_(message)(Vg_UserMsg, " the size of the main thread stack" + " using the --main-stacksize= flag."); + // FIXME: assumes main ThreadId == 1 + if (VG_(is_valid_tid)(1)) { + VG_(message)(Vg_UserMsg, + " The main thread stack size used in this run was %d.", + (Int)VG_(threads)[1].client_stack_szB); + } + } } if (VG_(is_action_requested)( "Attach to debugger", & VG_(clo_db_attach) )) { diff --git a/coregrind/pub_core_options.h b/coregrind/pub_core_options.h index 384a6a5e39..4c53df6c37 100644 --- a/coregrind/pub_core_options.h +++ b/coregrind/pub_core_options.h @@ -157,6 +157,9 @@ extern Bool VG_(clo_show_emwarns); consider a stack switch to have happened? Default: 2000000 bytes NB: must be host-word-sized to be correct (hence Word). */ extern Word VG_(clo_max_stackframe); +/* How large should Valgrind allow the primary thread's guest stack to + be? */ +extern Word VG_(clo_main_stacksize); /* Delay startup to allow GDB to be attached? Default: NO */ extern Bool VG_(clo_wait_for_gdb); diff --git a/docs/xml/manual-core.xml b/docs/xml/manual-core.xml index 95eb5df73b..317b655166 100644 --- a/docs/xml/manual-core.xml +++ b/docs/xml/manual-core.xml @@ -1046,6 +1046,60 @@ that can report errors, e.g. Memcheck, but not Cachegrind. + + + + + + Specifies the size of the main thread's stack. + + To simplify its memory management, Valgrind reserves all + required space for the main thread's stack at startup. That + means it needs to know the required stack size at + startup. + + By default, Valgrind uses the current "ulimit" value for + the stack size, or 16 MB, whichever is lower. In many cases + this gives a stack size in the range 8 to 16 MB, which almost + never overflows for most applications. + + If you need a larger total stack size, + use to specify it. Only set + it as high as you need, since reserving far more space than you + need (that is, hundreds of megabytes more than you need) + constrains Valgrind's memory allocators and may reduce the total + amount of memory that Valgrind can use. This is only really of + significance on 32-bit machines. + + On Linux, you may request a stack of size up to 2GB. + Valgrind will stop with a diagnostic message if the stack cannot + be allocated. On AIX5 the allowed stack size is restricted to + 128MB. + + only affects the stack + size for the program's initial thread. It has no bearing on the + size of thread stacks, as Valgrind does not allocate + those. + + You may need to use both + and together. It is important + to understand that sets the + maximum total stack size, + whilst specifies the largest + size of any one stack frame. You will have to work out + the value for yourself + (usually, if your applications segfaults). But Valgrind will + tell you the needed size, if + necessary. + + As discussed further in the description + of , a requirement for a large + stack is a sign of potential portability problems. You are best + advised to place all large data in heap-allocated memory. + + + diff --git a/memcheck/tests/addressable.stderr.exp b/memcheck/tests/addressable.stderr.exp index e9409ec750..161aebb466 100644 --- a/memcheck/tests/addressable.stderr.exp +++ b/memcheck/tests/addressable.stderr.exp @@ -19,6 +19,10 @@ Process terminating with default action of signal 11 (SIGSEGV) Access not within mapped region at address 0x........ at 0x........: test2 (addressable.c:51) by 0x........: main (addressable.c:125) + If you believe this happened as a result of a stack overflow in your + program's main thread (unlikely but possible), you can try to increase + the size of the main thread stack using the --main-stacksize= flag. + The main thread stack size used in this run was 16777216. ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0) malloc/free: in use at exit: 0 bytes in 0 blocks. diff --git a/memcheck/tests/badjump.stderr.exp b/memcheck/tests/badjump.stderr.exp index 6e723e558f..51dd7dfa22 100644 --- a/memcheck/tests/badjump.stderr.exp +++ b/memcheck/tests/badjump.stderr.exp @@ -8,6 +8,10 @@ Process terminating with default action of signal 11 (SIGSEGV) Access not within mapped region at address 0x........ at 0x........: ??? by 0x........: (below main) (in /...libc...) + If you believe this happened as a result of a stack overflow in your + program's main thread (unlikely but possible), you can try to increase + the size of the main thread stack using the --main-stacksize= flag. + The main thread stack size used in this run was 16777216. ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0) malloc/free: in use at exit: 0 bytes in 0 blocks. diff --git a/memcheck/tests/describe-block.stderr.exp b/memcheck/tests/describe-block.stderr.exp index 2f1aedc3db..fdba38f78f 100644 --- a/memcheck/tests/describe-block.stderr.exp +++ b/memcheck/tests/describe-block.stderr.exp @@ -7,6 +7,10 @@ Invalid write of size 1 Process terminating with default action of signal 11 (SIGSEGV) Access not within mapped region at address 0x........ at 0x........: main (describe-block.c:6) + If you believe this happened as a result of a stack overflow in your + program's main thread (unlikely but possible), you can try to increase + the size of the main thread stack using the --main-stacksize= flag. + The main thread stack size used in this run was 16777216. ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0) malloc/free: in use at exit: 0 bytes in 0 blocks. diff --git a/memcheck/tests/match-overrun.stderr.exp b/memcheck/tests/match-overrun.stderr.exp index 7b40894247..140d480da8 100644 --- a/memcheck/tests/match-overrun.stderr.exp +++ b/memcheck/tests/match-overrun.stderr.exp @@ -4,6 +4,10 @@ Process terminating with default action of signal 11 (SIGSEGV) Access not within mapped region at address 0x........ at 0x........: a1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 (match-overrun.c:6) by 0x........: main (match-overrun.c:11) + If you believe this happened as a result of a stack overflow in your + program's main thread (unlikely but possible), you can try to increase + the size of the main thread stack using the --main-stacksize= flag. + The main thread stack size used in this run was 16777216. ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) malloc/free: in use at exit: 0 bytes in 0 blocks. diff --git a/memcheck/tests/supp_unknown.stderr.exp b/memcheck/tests/supp_unknown.stderr.exp index 820dcfb001..b47097e207 100644 --- a/memcheck/tests/supp_unknown.stderr.exp +++ b/memcheck/tests/supp_unknown.stderr.exp @@ -3,3 +3,7 @@ Process terminating with default action of signal 11 (SIGSEGV) Access not within mapped region at address 0x........ at 0x........: ??? by 0x........: (below main) (in /...libc...) + If you believe this happened as a result of a stack overflow in your + program's main thread (unlikely but possible), you can try to increase + the size of the main thread stack using the --main-stacksize= flag. + The main thread stack size used in this run was 16777216. diff --git a/none/tests/blockfault.stderr.exp b/none/tests/blockfault.stderr.exp index 23354c2eac..3c2c3ffa7b 100644 --- a/none/tests/blockfault.stderr.exp +++ b/none/tests/blockfault.stderr.exp @@ -3,4 +3,8 @@ Process terminating with default action of signal 11 (SIGSEGV) Access not within mapped region at address 0x........ at 0x........: main (blockfault.c:32) + If you believe this happened as a result of a stack overflow in your + program's main thread (unlikely but possible), you can try to increase + the size of the main thread stack using the --main-stacksize= flag. + The main thread stack size used in this run was 16777216. diff --git a/none/tests/cmdline1.stdout.exp b/none/tests/cmdline1.stdout.exp index 02f8331344..5d8889fd5e 100644 --- a/none/tests/cmdline1.stdout.exp +++ b/none/tests/cmdline1.stdout.exp @@ -40,6 +40,8 @@ usage: valgrind [options] prog-and-args --input-fd= file descriptor for input [0=stdin] --max-stackframe= assume stack switch for SP changes larger than bytes [2000000] + --main-stacksize= set size of main thread's stack (in bytes) + [use current 'ulimit' value] user options for Nulgrind: (none) diff --git a/none/tests/cmdline2.stdout.exp b/none/tests/cmdline2.stdout.exp index 19a911e3c7..ca5ff2201f 100644 --- a/none/tests/cmdline2.stdout.exp +++ b/none/tests/cmdline2.stdout.exp @@ -40,6 +40,8 @@ usage: valgrind [options] prog-and-args --input-fd= file descriptor for input [0=stdin] --max-stackframe= assume stack switch for SP changes larger than bytes [2000000] + --main-stacksize= set size of main thread's stack (in bytes) + [use current 'ulimit' value] user options for Nulgrind: (none)