From d47181fd7d1956e50857f37ba3675b038daf65fc Mon Sep 17 00:00:00 2001
From: Florian Krohm <florian@eich-krohm.de>
Date: Fri, 13 Feb 2015 19:08:26 +0000
Subject: [PATCH] Add command line flag --max-threads=<integer> to increase the
 number of threads that valgrind can handle. No recompile is needed. Part of
 fixing BZ #337869.

git-svn-id: svn://svn.valgrind.org/valgrind/trunk@14932
---
 NEWS                                          |  4 ++++
 callgrind/main.c                              | 10 ++++++++--
 callgrind/threads.c                           |  7 +++++--
 coregrind/m_main.c                            | 12 ++++++++++-
 coregrind/m_options.c                         |  1 +
 coregrind/m_scheduler/scheduler.c             |  6 +++---
 coregrind/m_syswrap/syswrap-main.c            |  8 ++++++--
 coregrind/m_threadstate.c                     | 13 ++++++++++--
 coregrind/pub_core_options.h                  |  4 ++++
 coregrind/pub_core_threadstate.h              |  2 +-
 drd/drd_thread.c                              | 20 ++++++++++++-------
 drd/drd_thread.h                              |  4 ++--
 exp-sgcheck/sg_main.c                         | 13 +++++++++---
 helgrind/tests/locked_vs_unlocked2.stderr.exp |  8 ++++----
 include/pub_tool_threadstate.h                |  8 ++------
 none/tests/cmdline1.stdout.exp                |  2 ++
 none/tests/cmdline2.stdout.exp                |  2 ++
 17 files changed, 89 insertions(+), 35 deletions(-)
diff --git a/NEWS b/NEWS
index 0b7bb56882..481948a086 100644
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,10 @@ Release 3.11.0 is under development, not yet released.
   searching/extracting errors in output files mixing valgrind
   errors with program output.
 
+* New Option --max-threads=<number> can be used to increase the
+  number of threads valgrind can handle. The default is 500 threads
+  which should be more than enough for most applications.
+
 * ==================== FIXED BUGS ====================
 
 The following bugs have been fixed or resolved.  Note that "n-i-bz"
diff --git a/callgrind/main.c b/callgrind/main.c
index d4b3679b70..5e501bc41b 100644
--- a/callgrind/main.c
+++ b/callgrind/main.c
@@ -1703,9 +1703,9 @@ Bool CLG_(handle_client_request)(ThreadId tid, UWord *args, UWord *ret)
 
 /* struct timeval syscalltime[VG_N_THREADS]; */
 #if CLG_MICROSYSTIME
-ULong syscalltime[VG_N_THREADS];
+ULong *syscalltime;
 #else
-UInt syscalltime[VG_N_THREADS];
+UInt *syscalltime;
 #endif
 
 static
@@ -2071,6 +2071,12 @@ void CLG_(pre_clo_init)(void)
     VG_(track_post_deliver_signal)( & CLG_(post_signal) );
 
     CLG_(set_clo_defaults)();
+
+    syscalltime = CLG_MALLOC("cl.main.pci.1",
+                             VG_N_THREADS * sizeof syscalltime[0]);
+    for (UInt i = 0; i < VG_N_THREADS; ++i) {
+       syscalltime[i] = 0;
+    }
 }
 
 VG_DETERMINE_INTERFACE_VERSION(CLG_(pre_clo_init))
diff --git a/callgrind/threads.c b/callgrind/threads.c
index 023009f008..7dca771f0d 100644
--- a/callgrind/threads.c
+++ b/callgrind/threads.c
@@ -61,7 +61,7 @@ static exec_stack current_states;
 /* current running thread */
 ThreadId CLG_(current_tid);
 
-static thread_info* thread[VG_N_THREADS];
+static thread_info** thread;
 
 thread_info** CLG_(get_threads)()
 {
@@ -75,7 +75,10 @@ thread_info* CLG_(get_current_thread)()
 
 void CLG_(init_threads)()
 {
-    Int i;
+    UInt i;
+
+    thread = CLG_MALLOC("cl.threads.it.1", VG_N_THREADS * sizeof thread[0]);
+
     for(i=0;i<VG_N_THREADS;i++)
 	thread[i] = 0;
     CLG_(current_tid) = VG_INVALID_THREADID;
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index 51ada98a18..b956ae8487 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -212,6 +212,8 @@ static void usage_NORETURN ( Bool debug_help )
 "                  recovered by stack scanning [5]\n"
 "    --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]\n"
 "              attempt to avoid expensive address-space-resync operations\n"
+"    --max-threads=<number>    maximum number of threads that valgrind can\n"
+"                              handle [%d]\n"
 "\n";
 
    const HChar usage2[] = 
@@ -317,7 +319,8 @@ static void usage_NORETURN ( Bool debug_help )
                default_redzone_size       /* char* */,
                VG_(clo_vgdb_poll)         /* int */,
                VG_(vgdb_prefix_default)() /* char* */,
-               N_SECTORS_DEFAULT          /* int */
+               N_SECTORS_DEFAULT          /* int */,
+               MAX_THREADS_DEFAULT        /* int */
                ); 
    if (VG_(details).name) {
       VG_(printf)("  user options for %s:\n", VG_(details).name);
@@ -394,6 +397,9 @@ static void early_process_cmd_line_options ( /*OUT*/Int* need_help,
       else if VG_INT_CLO(str, "--max-stackframe", VG_(clo_max_stackframe)) {}
       else if VG_INT_CLO(str, "--main-stacksize", VG_(clo_main_stacksize)) {}
 
+      // Set up VG_(clo_max_threads); needed for VG_(tl_pre_clo_init)
+      else if VG_INT_CLO(str, "--max-threads", VG_(clo_max_threads)) {}
+
       // Set up VG_(clo_sim_hints). This is needed a.o. for an inner
       // running in an outer, to have "no-inner-prefix" enabled
       // as early as possible.
@@ -403,6 +409,9 @@ static void early_process_cmd_line_options ( /*OUT*/Int* need_help,
                             "no-nptl-pthread-stackcache",
                             VG_(clo_sim_hints)) {}
    }
+
+   /* For convenience */
+   VG_N_THREADS = VG_(clo_max_threads);
 }
 
 /* The main processing for command line options.  See comments above
@@ -539,6 +548,7 @@ void main_process_cmd_line_options ( /*OUT*/Bool* logging_to_fd,
       else if VG_STREQ(     arg, "-d")                   {}
       else if VG_STREQN(17, arg, "--max-stackframe=")    {}
       else if VG_STREQN(17, arg, "--main-stacksize=")    {}
+      else if VG_STREQN(14, arg, "--max-threads=")       {}
       else if VG_STREQN(12, arg, "--sim-hints=")         {}
       else if VG_STREQN(15, arg, "--profile-heap=")      {}
       else if VG_STREQN(20, arg, "--core-redzone-size=") {}
diff --git a/coregrind/m_options.c b/coregrind/m_options.c
index f9183e3261..3f21cef3b5 100644
--- a/coregrind/m_options.c
+++ b/coregrind/m_options.c
@@ -126,6 +126,7 @@ Bool   VG_(clo_track_fds)      = False;
 Bool   VG_(clo_show_below_main)= False;
 Bool   VG_(clo_show_emwarns)   = False;
 Word   VG_(clo_max_stackframe) = 2000000;
+UInt   VG_(clo_max_threads)    = MAX_THREADS_DEFAULT;
 Word   VG_(clo_main_stacksize) = 0; /* use client's rlimit.stack */
 Bool   VG_(clo_wait_for_gdb)   = False;
 VgSmc  VG_(clo_smc_check)      = Vg_SmcStack;
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index 3ec0c8d875..dcb60bcecd 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -247,9 +247,9 @@ ThreadId VG_(alloc_ThreadState) ( void )
          return i;
       }
    }
-   VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
-   VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
-   VG_(core_panic)("VG_N_THREADS is too low");
+   VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
+               "and rerun valgrind\n");
+   VG_(core_panic)("Max number of threads is too low");
    /*NOTREACHED*/
 }
 
diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c
index 1c522cdfa2..ac1d7bef5e 100644
--- a/coregrind/m_syswrap/syswrap-main.c
+++ b/coregrind/m_syswrap/syswrap-main.c
@@ -49,6 +49,7 @@
 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
 #include "pub_core_syscall.h"
 #include "pub_core_machine.h"
+#include "pub_core_mallocfree.h"
 #include "pub_core_syswrap.h"
 
 #include "priv_types_n_macros.h"
@@ -1364,13 +1365,13 @@ typedef
    }
    SyscallInfo;
 
-SyscallInfo syscallInfo[VG_N_THREADS];
-
+SyscallInfo *syscallInfo;
 
 /* The scheduler needs to be able to zero out these records after a
    fork, hence this is exported from m_syswrap. */
 void VG_(clear_syscallInfo) ( Int tid )
 {
+   vg_assert(syscallInfo);
    vg_assert(tid >= 0 && tid < VG_N_THREADS);
    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
    syscallInfo[tid].status.what = SsIdle;
@@ -1383,6 +1384,9 @@ static void ensure_initialised ( void )
    if (init_done) 
       return;
    init_done = True;
+
+   syscallInfo = VG_(malloc)("scinfo", VG_N_THREADS * sizeof syscallInfo[0]);
+
    for (i = 0; i < VG_N_THREADS; i++) {
       VG_(clear_syscallInfo)( i );
    }
diff --git a/coregrind/m_threadstate.c b/coregrind/m_threadstate.c
index c53e406b0b..b3adcde862 100644
--- a/coregrind/m_threadstate.c
+++ b/coregrind/m_threadstate.c
@@ -32,6 +32,7 @@
 #include "pub_core_vki.h"
 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
 #include "pub_core_threadstate.h"
+#include "pub_core_mallocfree.h"    // VG_(malloc)
 #include "pub_core_libcassert.h"
 #include "pub_core_inner.h"
 #if defined(ENABLE_INNER_CLIENT_REQUEST)
@@ -44,8 +45,8 @@
 
 ThreadId VG_(running_tid) = VG_INVALID_THREADID;
 
-ThreadState VG_(threads)[VG_N_THREADS]
-            __attribute__((aligned(LibVEX_GUEST_STATE_ALIGN)));
+ThreadState *VG_(threads);
+UInt VG_N_THREADS;
 
 /*------------------------------------------------------------*/
 /*--- Operations.                                          ---*/
@@ -54,6 +55,14 @@ ThreadState VG_(threads)[VG_N_THREADS]
 void VG_(init_Threads)(void)
 {
    ThreadId tid;
+   UChar *addr, *aligned_addr;
+
+   addr = VG_(malloc)("init_Threads",
+          VG_N_THREADS * sizeof VG_(threads)[0] + LibVEX_GUEST_STATE_ALIGN - 1);
+
+   // Align
+   aligned_addr = addr + (Addr)addr % LibVEX_GUEST_STATE_ALIGN;
+   VG_(threads) = (ThreadState *)aligned_addr;
 
    for (tid = 1; tid < VG_N_THREADS; tid++) {
       INNER_REQUEST(
diff --git a/coregrind/pub_core_options.h b/coregrind/pub_core_options.h
index 1b49554161..3407586edd 100644
--- a/coregrind/pub_core_options.h
+++ b/coregrind/pub_core_options.h
@@ -295,6 +295,10 @@ extern Word VG_(clo_max_stackframe);
    be? */
 extern Word VG_(clo_main_stacksize);
 
+/* The maximum number of threads we support. */
+#define MAX_THREADS_DEFAULT 500
+extern UInt VG_(clo_max_threads);
+
 /* If the same IP is found twice in a backtrace in a sequence of max
    VG_(clo_merge_recursive_frames) frames, then the recursive call
    is merged in the backtrace.
diff --git a/coregrind/pub_core_threadstate.h b/coregrind/pub_core_threadstate.h
index fafcce936d..ba75f14678 100644
--- a/coregrind/pub_core_threadstate.h
+++ b/coregrind/pub_core_threadstate.h
@@ -362,7 +362,7 @@ ThreadState;
 /* A statically allocated array of threads.  NOTE: [0] is
    never used, to simplify the simulation of initialisers for
    LinuxThreads. */
-extern ThreadState VG_(threads)[VG_N_THREADS];
+extern ThreadState *VG_(threads);
 
 // The running thread.  m_scheduler should be the only other module
 // to write to this.
diff --git a/drd/drd_thread.c b/drd/drd_thread.c
index 8f2ca647d4..247dd91eb1 100644
--- a/drd/drd_thread.c
+++ b/drd/drd_thread.c
@@ -65,7 +65,7 @@ static ULong    s_conflict_set_bitmap_creation_count;
 static ULong    s_conflict_set_bitmap2_creation_count;
 static ThreadId s_vg_running_tid  = VG_INVALID_THREADID;
 DrdThreadId     DRD_(g_drd_running_tid) = DRD_INVALID_THREADID;
-ThreadInfo      DRD_(g_threadinfo)[DRD_N_THREADS];
+ThreadInfo*     DRD_(g_threadinfo);
 struct bitmap*  DRD_(g_conflict_set);
 Bool DRD_(verify_conflict_set);
 static Bool     s_trace_context_switches = False;
@@ -142,6 +142,12 @@ void DRD_(thread_set_join_list_vol)(const int jlv)
 
 void DRD_(thread_init)(void)
 {
+   DRD_(g_threadinfo) = VG_(malloc)("drd.main.ti.1",
+                                DRD_N_THREADS * sizeof DRD_(g_threadinfo)[0]);
+   for (UInt i = 0; i < DRD_N_THREADS; ++i) {
+      static ThreadInfo initval;
+      DRD_(g_threadinfo)[i] = initval;
+   }
 }
 
 /**
@@ -152,7 +158,7 @@ void DRD_(thread_init)(void)
  */
 DrdThreadId DRD_(VgThreadIdToDrdThreadId)(const ThreadId tid)
 {
-   int i;
+   UInt i;
 
    if (tid == VG_INVALID_THREADID)
       return DRD_INVALID_THREADID;
@@ -172,7 +178,7 @@ DrdThreadId DRD_(VgThreadIdToDrdThreadId)(const ThreadId tid)
 /** Allocate a new DRD thread ID for the specified Valgrind thread ID. */
 static DrdThreadId DRD_(VgThreadIdToNewDrdThreadId)(const ThreadId tid)
 {
-   int i;
+   UInt i;
 
    tl_assert(DRD_(VgThreadIdToDrdThreadId)(tid) == DRD_INVALID_THREADID);
 
@@ -218,7 +224,7 @@ static DrdThreadId DRD_(VgThreadIdToNewDrdThreadId)(const ThreadId tid)
 /** Convert a POSIX thread ID into a DRD thread ID. */
 DrdThreadId DRD_(PtThreadIdToDrdThreadId)(const PThreadId tid)
 {
-   int i;
+   UInt i;
 
    if (tid != INVALID_POSIX_THREADID)
    {
@@ -336,7 +342,7 @@ DrdThreadId DRD_(thread_post_create)(const ThreadId vg_created)
 
 static void DRD_(thread_delayed_delete)(const DrdThreadId tid)
 {
-   int j;
+   UInt j;
 
    DRD_(g_threadinfo)[tid].vg_thread_exists = False;
    DRD_(g_threadinfo)[tid].posix_thread_exists = False;
@@ -476,9 +482,9 @@ void DRD_(thread_set_on_alt_stack)(const DrdThreadId tid,
 
 Int DRD_(thread_get_threads_on_alt_stack)(void)
 {
-   int i, n = 0;
+   int n = 0;
 
-   for (i = 1; i < DRD_N_THREADS; i++)
+   for (UInt i = 1; i < DRD_N_THREADS; i++)
       n += DRD_(g_threadinfo)[i].on_alt_stack;
    return n;
 }
diff --git a/drd/drd_thread.h b/drd/drd_thread.h
index d4899653a9..28068a7bee 100644
--- a/drd/drd_thread.h
+++ b/drd/drd_thread.h
@@ -113,7 +113,7 @@ typedef struct
  */
 extern DrdThreadId    DRD_(g_drd_running_tid);
 /** Per-thread information managed by DRD. */
-extern ThreadInfo     DRD_(g_threadinfo)[DRD_N_THREADS];
+extern ThreadInfo*    DRD_(g_threadinfo);
 /** Conflict set for the currently running thread. */
 extern struct bitmap* DRD_(g_conflict_set);
 extern Bool           DRD_(verify_conflict_set);
@@ -323,7 +323,7 @@ Bool DRD_(thread_address_on_stack)(const Addr a)
 static __inline__
 Bool DRD_(thread_address_on_any_stack)(const Addr a)
 {
-   int i;
+   UInt i;
 
    for (i = 1; i < DRD_N_THREADS; i++)
    {
diff --git a/exp-sgcheck/sg_main.c b/exp-sgcheck/sg_main.c
index 22c1000d31..0b2c50ef5b 100644
--- a/exp-sgcheck/sg_main.c
+++ b/exp-sgcheck/sg_main.c
@@ -1038,11 +1038,11 @@ static ULong stats__qcache_probes  = 0;
    * a shadow stack of StackFrames, which is a double-linked list
    * an stack block interval tree
 */
-static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
+static  struct _StackFrame**         shadowStacks;
 
-static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
+static  WordFM** /* StackTreeNode */ siTrees;
 
-static  QCache                       qcaches[VG_N_THREADS];
+static  QCache*                      qcaches;
 
 
 /* Additionally, there is one global variable interval tree
@@ -1062,9 +1062,16 @@ static void invalidate_all_QCaches ( void )
 static void ourGlobals_init ( void )
 {
    Word i;
+
+   shadowStacks = sg_malloc( "di.sg_main.oGi.2",
+                             VG_N_THREADS * sizeof shadowStacks[0] );
+   siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
+   qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
+
    for (i = 0; i < VG_N_THREADS; i++) {
       shadowStacks[i] = NULL;
       siTrees[i] = NULL;
+      qcaches[i] = (QCache){};
    }
    invalidate_all_QCaches();
    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free, 
diff --git a/helgrind/tests/locked_vs_unlocked2.stderr.exp b/helgrind/tests/locked_vs_unlocked2.stderr.exp
index 3463b54177..d80f32d7ce 100644
--- a/helgrind/tests/locked_vs_unlocked2.stderr.exp
+++ b/helgrind/tests/locked_vs_unlocked2.stderr.exp
@@ -16,13 +16,13 @@ Thread #x was created
 
  Lock at 0x........ was first observed
    at 0x........: pthread_mutex_init (hg_intercepts.c:...)
-   by 0x........: main (locked_vs_unlocked2.c:58)
- Address 0x........ is 0 bytes inside data symbol "mx2a"
+   by 0x........: main (locked_vs_unlocked2.c:59)
+ Address 0x........ is 0 bytes inside data symbol "mx2b"
 
  Lock at 0x........ was first observed
    at 0x........: pthread_mutex_init (hg_intercepts.c:...)
-   by 0x........: main (locked_vs_unlocked2.c:59)
- Address 0x........ is 0 bytes inside data symbol "mx2b"
+   by 0x........: main (locked_vs_unlocked2.c:58)
+ Address 0x........ is 0 bytes inside data symbol "mx2a"
 
  Lock at 0x........ was first observed
    at 0x........: pthread_mutex_init (hg_intercepts.c:...)
diff --git a/include/pub_tool_threadstate.h b/include/pub_tool_threadstate.h
index 7220fca073..ccd5d97ddb 100644
--- a/include/pub_tool_threadstate.h
+++ b/include/pub_tool_threadstate.h
@@ -33,12 +33,8 @@
 
 #include "pub_tool_basics.h"   // ThreadID
 
-/* The maximum number of pthreads that we support.  This is
-   deliberately not very high since our implementation of some of the
-   scheduler algorithms is surely O(N) in the number of threads, since
-   that's simple, at least.  And (in practice) we hope that most
-   programs do not need many threads. */
-#define VG_N_THREADS 500
+/* The maximum number of pthreads that we support. */
+extern UInt VG_N_THREADS;
 
 /* Special magic value for an invalid ThreadId.  It corresponds to
    LinuxThreads using zero as the initial value for
diff --git a/none/tests/cmdline1.stdout.exp b/none/tests/cmdline1.stdout.exp
index 8367d4ebba..20ffe31ae7 100644
--- a/none/tests/cmdline1.stdout.exp
+++ b/none/tests/cmdline1.stdout.exp
@@ -125,6 +125,8 @@ usage: valgrind [options] prog-and-args
                   recovered by stack scanning [5]
     --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]
               attempt to avoid expensive address-space-resync operations
+    --max-threads=<number>    maximum number of threads that valgrind can
+                              handle [500]
 
   user options for Nulgrind:
     (none)
diff --git a/none/tests/cmdline2.stdout.exp b/none/tests/cmdline2.stdout.exp
index 2654de1d7b..44b6511ac1 100644
--- a/none/tests/cmdline2.stdout.exp
+++ b/none/tests/cmdline2.stdout.exp
@@ -125,6 +125,8 @@ usage: valgrind [options] prog-and-args
                   recovered by stack scanning [5]
     --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]
               attempt to avoid expensive address-space-resync operations
+    --max-threads=<number>    maximum number of threads that valgrind can
+                              handle [500]
 
   user options for Nulgrind:
     (none)
-- 
2.47.3