From: Bart Van Assche Date: Fri, 6 Jun 2008 14:31:36 +0000 (+0000) Subject: Speed up analysis of programs that access the thread stack intensively. X-Git-Tag: svn/VALGRIND_3_4_0~508 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d75a4f4b2421179ee9e73c9c66051c174232569d;p=thirdparty%2Fvalgrind.git Speed up analysis of programs that access the thread stack intensively. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@8196 --- diff --git a/exp-drd/drd_main.c b/exp-drd/drd_main.c index c8131897ea..d6f21dc7c9 100644 --- a/exp-drd/drd_main.c +++ b/exp-drd/drd_main.c @@ -70,11 +70,11 @@ static void drd_start_client_code(const ThreadId tid, const ULong bbs_done); // Local variables. -static Bool s_drd_check_stack_var = False; -static Bool s_drd_print_stats = False; -static Bool s_drd_trace_fork_join = False; -static Bool s_drd_var_info = False; -static Bool s_show_stack_usage = False; +static Bool s_drd_check_stack_accesses = False; +static Bool s_drd_print_stats = False; +static Bool s_drd_trace_fork_join = False; +static Bool s_drd_var_info = False; +static Bool s_show_stack_usage = False; // @@ -84,22 +84,22 @@ static Bool s_show_stack_usage = False; static Bool drd_process_cmd_line_option(Char* arg) { int exclusive_threshold_ms = -1; - int segment_merging = -1; + int segment_merging = -1; int shared_threshold_ms = -1; - int show_confl_seg = -1; - int trace_barrier = -1; - int trace_clientobj = -1; - int trace_cond = -1; - int trace_csw = -1; - int trace_danger_set = -1; - int trace_mutex = -1; - int trace_rwlock = -1; - int trace_segment = -1; - int trace_semaphore = -1; - int trace_suppression = -1; - Char* trace_address = 0; - - VG_BOOL_CLO (arg, "--check-stack-var", s_drd_check_stack_var) + int show_confl_seg = -1; + int trace_barrier = -1; + int trace_clientobj = -1; + int trace_cond = -1; + int trace_csw = -1; + int trace_danger_set = -1; + int trace_mutex = -1; + int trace_rwlock = -1; + int trace_segment = -1; + int trace_semaphore = -1; + int trace_suppression = -1; + Char* trace_address = 0; + + VG_BOOL_CLO (arg, "--check-stack-var", s_drd_check_stack_accesses) else VG_BOOL_CLO(arg, "--drd-stats", s_drd_print_stats) else VG_BOOL_CLO(arg, "--segment-merging", segment_merging) else VG_BOOL_CLO(arg, "--show-confl-seg", show_confl_seg) @@ -278,7 +278,8 @@ static VG_REGPARM(2) void drd_trace_load(Addr addr, SizeT size) { drd_trace_mem_access(addr, size, eLoad); } - if (bm_access_load_triggers_conflict(addr, addr + size)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_load_triggers_conflict(addr, addr + size)) { drd_report_race(addr, size, eLoad); } @@ -293,7 +294,8 @@ static VG_REGPARM(1) void drd_trace_load_1(Addr addr) { drd_trace_mem_access(addr, 1, eLoad); } - if (bm_access_load_1_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_load_1_triggers_conflict(addr)) { drd_report_race(addr, 1, eLoad); } @@ -308,7 +310,8 @@ static VG_REGPARM(1) void drd_trace_load_2(Addr addr) { drd_trace_mem_access(addr, 2, eLoad); } - if (bm_access_load_2_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_load_2_triggers_conflict(addr)) { drd_report_race(addr, 2, eLoad); } @@ -323,7 +326,8 @@ static VG_REGPARM(1) void drd_trace_load_4(Addr addr) { drd_trace_mem_access(addr, 4, eLoad); } - if (bm_access_load_4_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_load_4_triggers_conflict(addr)) { drd_report_race(addr, 4, eLoad); } @@ -338,7 +342,8 @@ static VG_REGPARM(1) void drd_trace_load_8(Addr addr) { drd_trace_mem_access(addr, 8, eLoad); } - if (bm_access_load_8_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_load_8_triggers_conflict(addr)) { drd_report_race(addr, 8, eLoad); } @@ -360,7 +365,8 @@ VG_REGPARM(2) void drd_trace_store(Addr addr, SizeT size) { drd_trace_mem_access(addr, size, eStore); } - if (bm_access_store_triggers_conflict(addr, addr + size)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_store_triggers_conflict(addr, addr + size)) { drd_report_race(addr, size, eStore); } @@ -375,7 +381,8 @@ static VG_REGPARM(1) void drd_trace_store_1(Addr addr) { drd_trace_mem_access(addr, 1, eStore); } - if (bm_access_store_1_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_store_1_triggers_conflict(addr)) { drd_report_race(addr, 1, eStore); } @@ -390,7 +397,8 @@ static VG_REGPARM(1) void drd_trace_store_2(Addr addr) { drd_trace_mem_access(addr, 2, eStore); } - if (bm_access_store_2_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_store_2_triggers_conflict(addr)) { drd_report_race(addr, 2, eStore); } @@ -405,7 +413,8 @@ static VG_REGPARM(1) void drd_trace_store_4(Addr addr) { drd_trace_mem_access(addr, 4, eStore); } - if (bm_access_store_4_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_store_4_triggers_conflict(addr)) { drd_report_race(addr, 4, eStore); } @@ -420,7 +429,8 @@ static VG_REGPARM(1) void drd_trace_store_8(Addr addr) { drd_trace_mem_access(addr, 8, eStore); } - if (bm_access_store_8_triggers_conflict(addr)) + if ((s_drd_check_stack_accesses || ! thread_address_on_stack(addr)) + && bm_access_store_8_triggers_conflict(addr)) { drd_report_race(addr, 8, eStore); } @@ -473,7 +483,8 @@ static void drd_post_mem_write(const CorePart part, } } -static void drd_start_using_mem(const Addr a1, const SizeT len) +static __inline__ +void drd_start_using_mem(const Addr a1, const SizeT len) { tl_assert(a1 < a1 + len); @@ -509,7 +520,7 @@ void drd_stop_using_mem(const Addr a1, const SizeT len, { drd_trace_mem_access(a1, len, eEnd); } - if (! is_stack_mem || s_drd_check_stack_var) + if (! is_stack_mem || s_drd_check_stack_accesses) { thread_stop_using_mem(a1, a2); clientobj_stop_using_mem(a1, a2); @@ -583,7 +594,8 @@ void drd_start_using_mem_w_perms(const Addr a, const SizeT len, /* Called by the core when the stack of a thread grows, to indicate that */ /* the addresses in range [ a, a + len [ may now be used by the client. */ /* Assumption: stacks grow downward. */ -static void drd_start_using_mem_stack(const Addr a, const SizeT len) +static __inline__ +void drd_start_using_mem_stack(const Addr a, const SizeT len) { thread_set_stack_min(thread_get_running_tid(), a - VG_STACK_REDZONE_SZB); drd_start_using_mem(a - VG_STACK_REDZONE_SZB, @@ -593,7 +605,8 @@ static void drd_start_using_mem_stack(const Addr a, const SizeT len) /* Called by the core when the stack of a thread shrinks, to indicate that */ /* the addresses [ a, a + len [ are no longer accessible for the client. */ /* Assumption: stacks grow downward. */ -static void drd_stop_using_mem_stack(const Addr a, const SizeT len) +static __inline__ +void drd_stop_using_mem_stack(const Addr a, const SizeT len) { thread_set_stack_min(thread_get_running_tid(), a + len - VG_STACK_REDZONE_SZB); @@ -649,7 +662,7 @@ void drd_post_thread_create(const ThreadId vg_created) "drd_post_thread_create created = %d/%d", vg_created, drd_created); } - if (! s_drd_check_stack_var) + if (! s_drd_check_stack_accesses) { drd_start_suppression(thread_get_stack_max(drd_created) - thread_get_stack_size(drd_created), @@ -691,7 +704,7 @@ void drd_post_thread_join(DrdThreadId drd_joiner, DrdThreadId drd_joinee) VG_(free)(msg); } - if (! s_drd_check_stack_var) + if (! s_drd_check_stack_accesses) { drd_finish_suppression(thread_get_stack_max(drd_joinee) - thread_get_stack_size(drd_joinee), diff --git a/exp-drd/drd_thread.h b/exp-drd/drd_thread.h index 299bed3210..ddbd086c89 100644 --- a/exp-drd/drd_thread.h +++ b/exp-drd/drd_thread.h @@ -162,20 +162,20 @@ Bool IsValidDrdThreadId(const DrdThreadId tid) && s_threadinfo[tid].detached_posix_thread == False)); } -static inline +static __inline__ DrdThreadId thread_get_running_tid(void) { tl_assert(s_drd_running_tid != DRD_INVALID_THREADID); return s_drd_running_tid; } -static inline +static __inline__ struct bitmap* thread_get_danger_set(void) { return s_danger_set; } -static inline +static __inline__ Bool running_thread_is_recording(void) { tl_assert(0 <= (int)s_drd_running_tid && s_drd_running_tid < DRD_N_THREADS @@ -184,7 +184,7 @@ Bool running_thread_is_recording(void) && s_threadinfo[s_drd_running_tid].is_recording); } -static inline +static __inline__ void thread_set_stack_min(const DrdThreadId tid, const Addr stack_min) { #if 0 @@ -203,8 +203,18 @@ void thread_set_stack_min(const DrdThreadId tid, const Addr stack_min) } } +/** Return true if and only if the specified address is on the stack of the + * currently scheduled thread. + */ +static __inline__ +Bool thread_address_on_stack(const Addr a) +{ + return (s_threadinfo[s_drd_running_tid].stack_min <= a + && a < s_threadinfo[s_drd_running_tid].stack_max); +} + /** Return a pointer to the latest segment for the specified thread. */ -static inline +static __inline__ Segment* thread_get_segment(const DrdThreadId tid) { tl_assert(0 <= (int)tid && tid < DRD_N_THREADS @@ -214,7 +224,7 @@ Segment* thread_get_segment(const DrdThreadId tid) } /** Return a pointer to the latest segment for the running thread. */ -static inline +static __inline__ Segment* running_thread_get_segment(void) { return thread_get_segment(s_drd_running_tid); diff --git a/exp-drd/scripts/run-splash2 b/exp-drd/scripts/run-splash2 index 65a6b15fa6..1b4bbd16b5 100755 --- a/exp-drd/scripts/run-splash2 +++ b/exp-drd/scripts/run-splash2 @@ -97,17 +97,17 @@ fi # Results: (-p1) (-p2) (-p3) (-p4) ITC (-p4) ITC (-p4) # original w/ filter # ......................................................................... -# Cholesky 39 49 ? 81 239 82 -# FFT 15 16 N/A 43 90 41 -# LU, contiguous blocks 38 39 ? 43 428 128 -# LU, non-contiguous blocks 32 34 ? 41 428 128 -# Ocean, contiguous partitions 19 23 N/A 29 90 28 -# Ocean, non-continguous partns 18 21 N/A 31 90 28 -# Radiosity 92 92 ? 92 485 163 -# Radix 11 14 ? 16 222 56 -# Raytrace 70 70 ? 70 172 53 -# Water-n2 50 50 ? 50 189 39 -# Water-sp 49 48 ? 49 183 34 +# Cholesky 40 47 ? 82 239 82 +# FFT 16 17 N/A 47 90 41 +# LU, contiguous blocks 39 41 ? 45 428 128 +# LU, non-contiguous blocks 39 41 ? 49 428 128 +# Ocean, contiguous partitions 17 19 N/A 25 90 28 +# Ocean, non-continguous partns 18 21 N/A 30 90 28 +# Radiosity 78 78 ? 78 485 163 +# Radix 10 12 ? 15 222 56 +# Raytrace 56 56 ? 56 172 53 +# Water-n2 34 34 ? 34 189 39 +# Water-sp 33 33 ? 33 183 34 # # Hardware: dual-core Intel Xeon 5130, 1.995 MHz, 4 MB L2 cache, 4 GB RAM. # Software: Ubuntu 7.10 server, 64-bit (includes gcc 4.1.3).