From: Philippe Waroquiers Date: Sat, 4 Nov 2017 22:32:19 +0000 (+0100) Subject: Improve efficiency of SP tracking in helgrind (and incidentally in exp-sgheck) X-Git-Tag: VALGRIND_3_14_0~204 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cc897604813cee1f5d3c614c76e5ca9ce696e6c3;p=thirdparty%2Fvalgrind.git Improve efficiency of SP tracking in helgrind (and incidentally in exp-sgheck) Helgrind (and incidentally exp-sgcheck) does not need both of tracking new mem stack and die mem stack: Helgrind only tracks new mem stack. exp-sgcheck only tracks die mem stack. Currently, m_translate.c vg_SP_update_pass inserts helpers calls for new and die mem stack, even if the tool only needs new mem stack (helgrind) or die mem stack (exp-sgcheck). The optimisation consists in not inserting helpers calls when the tool does not need to see new (or die) mem stack. Also, for helgrind, implement specialised new_mem_stack for known SP updates with small values (like memcheck). This reduces the size of the generated code for helgrind and exp-sgcheck. (see below the diffs on perf/memrw). This does not impact the code generation for tools that tracks both new and die mem stack (such as memcheck). trunk: exp-sgcheck: --28481-- transtab: new 2,256 (44,529 -> 581,402; ratio 13.1) [0 scs] avg tce size 257 helgrind: --28496-- transtab: new 2,299 (46,667 -> 416,575; ratio 8.9) [0 scs] avg tce size 181 memcheck: --28501-- transtab: new 2,220 (50,038 -> 777,139; ratio 15.5) [0 scs] avg tce size 350 with this patch: exp-sgcheck: --28516-- transtab: new 2,254 (44,479 -> 567,196; ratio 12.8) [0 scs] avg tce size 251 helgrind: --28512-- transtab: new 2,297 (46,620 -> 399,799; ratio 8.6) [0 scs] avg tce size 174 memcheck: --28507-- transtab: new 2,219 (49,991 -> 776,028; ratio 15.5) [0 scs] avg tce size 349 More in details, the changes consist in: pub_core_tooliface.h: * add 2 booleans any_new_mem_stack and any_die_mem_stack to the tdict struct * renamed VG_(sanity_check_needs) to VG_(finish_needs_init), as it does now more than sanity checks : it derives the 2 above booleans. m_tooliface.c: * change VG_(sanity_check_needs) to VG_(finish_needs_init) m_main.c: * update call to VG_(sanity_check_needs) hg_main.c: * add a few inlines for functions just calling another function * define the functions evh__new_mem_stack_[4|8|12|16|32|112|128|144|160] (using the macro DCL_evh__new_mem_stack). * call the VG_(track_new_mem_stack_[4|8|12|16|32|112|128|144|160]) m_translate.c * n_SP_updates_* stats are now maintained separately for the new and die fast and known cases. * need_to_handle_SP_assignment can now check only the 2 booleans any_new_mem_stack and any_die_mem_stack * DO_NEW macro: does not insert anymore a helper call if the tool does not track 'new' mem_stack. In case there is no new tracking, it however still does update the SP aliases (and the n_SP_updates_new_fast). * similar changes for DO_DIE macro. * a bunch of white spaces changes Note: it is easier to look at the changes in this file using git diff -w to ignore the white spaces changes (e.g. due to DO_NEW/DO_DIE indentation changes). regtested on debian/amd64 and on centos/ppc64 --- diff --git a/coregrind/m_main.c b/coregrind/m_main.c index 4526b1259d..e3d5f65c97 100644 --- a/coregrind/m_main.c +++ b/coregrind/m_main.c @@ -1651,10 +1651,10 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp ) { /* The tool's "needs" will by now be finalised, since it has no further opportunity to specify them. So now sanity check - them. */ + and finish initialising the needs. */ const HChar* s; Bool ok; - ok = VG_(sanity_check_needs)( &s ); + ok = VG_(finish_needs_init)( &s ); if (!ok) { VG_(core_panic)(s); } diff --git a/coregrind/m_tooliface.c b/coregrind/m_tooliface.c index 31b5f6ce97..d2e7ad98db 100644 --- a/coregrind/m_tooliface.c +++ b/coregrind/m_tooliface.c @@ -103,7 +103,7 @@ VgNeeds VG_(needs) = { }; /* static */ -Bool VG_(sanity_check_needs)(const HChar** failmsg) +Bool VG_(finish_needs_init)(const HChar** failmsg) { Bool any_new_mem_stack_N, any_new_mem_stack_N_w_ECU; Bool any_new_mem_stack_w_conflicting_otags; @@ -124,7 +124,7 @@ Bool VG_(sanity_check_needs)(const HChar** failmsg) /* Check that new_mem_stack is defined if any new_mem_stack_N are. */ - any_new_mem_stack_N + any_new_mem_stack_N = VG_(tdict).track_new_mem_stack_4 || VG_(tdict).track_new_mem_stack_8 || VG_(tdict).track_new_mem_stack_12 || @@ -182,6 +182,9 @@ Bool VG_(sanity_check_needs)(const HChar** failmsg) " but you can only have one or the other (not both)\n"; return False; } + VG_(tdict).any_new_mem_stack + = VG_(tdict).track_new_mem_stack || VG_(tdict).track_new_mem_stack_w_ECU + || any_new_mem_stack_N || any_new_mem_stack_N_w_ECU; /* Check that die_mem_stack is defined if any die_mem_stack_N are. */ @@ -202,6 +205,8 @@ Bool VG_(sanity_check_needs)(const HChar** failmsg) " 'die_mem_stack' should be defined\n"; return False; } + VG_(tdict).any_die_mem_stack + = VG_(tdict).track_die_mem_stack || any_die_mem_stack_N; return True; diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c index 55c845daed..d03df5d33a 100644 --- a/coregrind/m_translate.c +++ b/coregrind/m_translate.c @@ -66,8 +66,10 @@ /*--- Stats ---*/ /*------------------------------------------------------------*/ -static ULong n_SP_updates_fast = 0; -static ULong n_SP_updates_generic_known = 0; +static ULong n_SP_updates_new_fast = 0; +static ULong n_SP_updates_new_generic_known = 0; +static ULong n_SP_updates_die_fast = 0; +static ULong n_SP_updates_die_generic_known = 0; static ULong n_SP_updates_generic_unknown = 0; static ULong n_PX_VexRegUpdSpAtMemAccess = 0; @@ -77,19 +79,25 @@ static ULong n_PX_VexRegUpdAllregsAtEachInsn = 0; void VG_(print_translation_stats) ( void ) { - UInt n_SP_updates = n_SP_updates_fast + n_SP_updates_generic_known - + n_SP_updates_generic_unknown; + UInt n_SP_updates = n_SP_updates_new_fast + n_SP_updates_new_generic_known + + n_SP_updates_die_fast + n_SP_updates_die_generic_known + + n_SP_updates_generic_unknown; if (n_SP_updates == 0) { VG_(message)(Vg_DebugMsg, "translate: no SP updates identified\n"); } else { VG_(message)(Vg_DebugMsg, - "translate: fast SP updates identified: %'llu (%3.1f%%)\n", - n_SP_updates_fast, n_SP_updates_fast * 100.0 / n_SP_updates ); + "translate: fast new/die SP updates identified: " + "%'llu (%3.1f%%)/%'llu (%3.1f%%)\n", + n_SP_updates_new_fast, n_SP_updates_new_fast * 100.0 / n_SP_updates, + n_SP_updates_die_fast, n_SP_updates_die_fast * 100.0 / n_SP_updates ); VG_(message)(Vg_DebugMsg, - "translate: generic_known SP updates identified: %'llu (%3.1f%%)\n", - n_SP_updates_generic_known, - n_SP_updates_generic_known * 100.0 / n_SP_updates ); + "translate: generic_known new/die SP updates identified: " + "%'llu (%3.1f%%)/%'llu (%3.1f%%)\n", + n_SP_updates_new_generic_known, + n_SP_updates_new_generic_known * 100.0 / n_SP_updates, + n_SP_updates_die_generic_known, + n_SP_updates_die_generic_known * 100.0 / n_SP_updates ); VG_(message)(Vg_DebugMsg, "translate: generic_unknown SP updates identified: %'llu (%3.1f%%)\n", @@ -97,8 +105,12 @@ void VG_(print_translation_stats) ( void ) n_SP_updates_generic_unknown * 100.0 / n_SP_updates ); } - VG_(message)(Vg_DebugMsg, - "translate: PX: SPonly %'llu, UnwRegs %'llu, AllRegs %'llu, AllRegsAllInsns %'llu\n", n_PX_VexRegUpdSpAtMemAccess, n_PX_VexRegUpdUnwindregsAtMemAccess, n_PX_VexRegUpdAllregsAtMemAccess, n_PX_VexRegUpdAllregsAtEachInsn); + VG_(message) + (Vg_DebugMsg, + "translate: PX: SPonly %'llu, UnwRegs %'llu," + " AllRegs %'llu, AllRegsAllInsns %'llu\n", + n_PX_VexRegUpdSpAtMemAccess, n_PX_VexRegUpdUnwindregsAtMemAccess, + n_PX_VexRegUpdAllregsAtMemAccess, n_PX_VexRegUpdAllregsAtEachInsn); } /*------------------------------------------------------------*/ @@ -107,26 +119,7 @@ void VG_(print_translation_stats) ( void ) static Bool need_to_handle_SP_assignment(void) { - return ( VG_(tdict).track_new_mem_stack_4 || - VG_(tdict).track_die_mem_stack_4 || - VG_(tdict).track_new_mem_stack_8 || - VG_(tdict).track_die_mem_stack_8 || - VG_(tdict).track_new_mem_stack_12 || - VG_(tdict).track_die_mem_stack_12 || - VG_(tdict).track_new_mem_stack_16 || - VG_(tdict).track_die_mem_stack_16 || - VG_(tdict).track_new_mem_stack_32 || - VG_(tdict).track_die_mem_stack_32 || - VG_(tdict).track_new_mem_stack_112 || - VG_(tdict).track_die_mem_stack_112 || - VG_(tdict).track_new_mem_stack_128 || - VG_(tdict).track_die_mem_stack_128 || - VG_(tdict).track_new_mem_stack_144 || - VG_(tdict).track_die_mem_stack_144 || - VG_(tdict).track_new_mem_stack_160 || - VG_(tdict).track_die_mem_stack_160 || - VG_(tdict).track_new_mem_stack || - VG_(tdict).track_die_mem_stack ); + return VG_(tdict).any_new_mem_stack || VG_(tdict).any_die_mem_stack; } // - The SP aliases are held in an array which is used as a circular buffer. @@ -321,72 +314,82 @@ IRSB* vg_SP_update_pass ( void* closureV, vanilla = NULL != VG_(tdict).track_new_mem_stack_##syze; \ w_ecu = NULL != VG_(tdict).track_new_mem_stack_##syze##_w_ECU; \ vg_assert(!(vanilla && w_ecu)); /* can't have both */ \ - if (!(vanilla || w_ecu)) \ + if (VG_(tdict).any_new_mem_stack \ + && !vanilla && !w_ecu) { \ + n_SP_updates_new_generic_known++; \ goto generic; \ - \ - /* I don't know if it's really necessary to say that the */ \ - /* call reads the stack pointer. But anyway, we do. */ \ - if (w_ecu) { \ - dcall = unsafeIRDirty_0_N( \ - 2/*regparms*/, \ - "track_new_mem_stack_" #syze "_w_ECU", \ - VG_(fnptr_to_fnentry)( \ - VG_(tdict).track_new_mem_stack_##syze##_w_ECU ), \ - mkIRExprVec_2(IRExpr_RdTmp(tmpp), \ - mk_ecu_Expr(curr_IP)) \ - ); \ - } else { \ - dcall = unsafeIRDirty_0_N( \ - 1/*regparms*/, \ - "track_new_mem_stack_" #syze , \ - VG_(fnptr_to_fnentry)( \ - VG_(tdict).track_new_mem_stack_##syze ), \ - mkIRExprVec_1(IRExpr_RdTmp(tmpp)) \ - ); \ } \ - dcall->nFxState = 1; \ - dcall->fxState[0].fx = Ifx_Read; \ - dcall->fxState[0].offset = layout->offset_SP; \ - dcall->fxState[0].size = layout->sizeof_SP; \ - dcall->fxState[0].nRepeats = 0; \ - dcall->fxState[0].repeatLen = 0; \ \ - addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); \ + if (VG_(tdict).any_new_mem_stack) { \ + /* I don't know if it's really necessary to say that the */ \ + /* call reads the stack pointer. But anyway, we do. */ \ + if (w_ecu) { \ + dcall = unsafeIRDirty_0_N( \ + 2/*regparms*/, \ + "track_new_mem_stack_" #syze "_w_ECU", \ + VG_(fnptr_to_fnentry)( \ + VG_(tdict).track_new_mem_stack_##syze##_w_ECU ), \ + mkIRExprVec_2(IRExpr_RdTmp(tmpp), \ + mk_ecu_Expr(curr_IP)) \ + ); \ + } else { \ + dcall = unsafeIRDirty_0_N( \ + 1/*regparms*/, \ + "track_new_mem_stack_" #syze , \ + VG_(fnptr_to_fnentry)( \ + VG_(tdict).track_new_mem_stack_##syze ), \ + mkIRExprVec_1(IRExpr_RdTmp(tmpp)) \ + ); \ + } \ + dcall->nFxState = 1; \ + dcall->fxState[0].fx = Ifx_Read; \ + dcall->fxState[0].offset = layout->offset_SP; \ + dcall->fxState[0].size = layout->sizeof_SP; \ + dcall->fxState[0].nRepeats = 0; \ + dcall->fxState[0].repeatLen = 0; \ + \ + addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); \ + } \ \ vg_assert(syze > 0); \ update_SP_aliases(syze); \ \ - n_SP_updates_fast++; \ + n_SP_updates_new_fast++; \ \ } while (0) # define DO_DIE(syze, tmpp) \ do { \ - if (!VG_(tdict).track_die_mem_stack_##syze) \ + if (VG_(tdict).any_die_mem_stack \ + && !VG_(tdict).track_die_mem_stack_##syze) { \ + n_SP_updates_die_generic_known++; \ goto generic; \ + } \ \ - /* I don't know if it's really necessary to say that the */ \ - /* call reads the stack pointer. But anyway, we do. */ \ - dcall = unsafeIRDirty_0_N( \ - 1/*regparms*/, \ - "track_die_mem_stack_" #syze, \ - VG_(fnptr_to_fnentry)( \ - VG_(tdict).track_die_mem_stack_##syze ), \ - mkIRExprVec_1(IRExpr_RdTmp(tmpp)) \ - ); \ - dcall->nFxState = 1; \ - dcall->fxState[0].fx = Ifx_Read; \ - dcall->fxState[0].offset = layout->offset_SP; \ - dcall->fxState[0].size = layout->sizeof_SP; \ - dcall->fxState[0].nRepeats = 0; \ - dcall->fxState[0].repeatLen = 0; \ + if (VG_(tdict).any_die_mem_stack) { \ + /* I don't know if it's really necessary to say that the */ \ + /* call reads the stack pointer. But anyway, we do. */ \ + dcall = unsafeIRDirty_0_N( \ + 1/*regparms*/, \ + "track_die_mem_stack_" #syze, \ + VG_(fnptr_to_fnentry)( \ + VG_(tdict).track_die_mem_stack_##syze ), \ + mkIRExprVec_1(IRExpr_RdTmp(tmpp)) \ + ); \ + dcall->nFxState = 1; \ + dcall->fxState[0].fx = Ifx_Read; \ + dcall->fxState[0].offset = layout->offset_SP; \ + dcall->fxState[0].size = layout->sizeof_SP; \ + dcall->fxState[0].nRepeats = 0; \ + dcall->fxState[0].repeatLen = 0; \ \ - addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); \ + addStmtToIRSB( bb, IRStmt_Dirty(dcall) ); \ + } \ \ vg_assert(syze > 0); \ update_SP_aliases(-(syze)); \ \ - n_SP_updates_fast++; \ + n_SP_updates_die_fast++; \ \ } while (0) @@ -497,10 +500,18 @@ IRSB* vg_SP_update_pass ( void* closureV, case -144: DO_NEW( 144, tttmp); addStmtToIRSB(bb,st); continue; case 160: DO_DIE( 160, tttmp); addStmtToIRSB(bb,st); continue; case -160: DO_NEW( 160, tttmp); addStmtToIRSB(bb,st); continue; - default: - /* common values for ppc64: 144 128 160 112 176 */ - n_SP_updates_generic_known++; - goto generic; + default: + if (delta > 0) { + n_SP_updates_die_generic_known++; + if (VG_(tdict).any_die_mem_stack) + goto generic; + } else { + n_SP_updates_new_generic_known++; + if (VG_(tdict).any_new_mem_stack) + goto generic; + } + /* No tracking for delta. Just add the original statement. */ + addStmtToIRSB(bb,st); continue; } } else { /* Deal with an unknown update to SP. We're here because diff --git a/coregrind/pub_core_tooliface.h b/coregrind/pub_core_tooliface.h index e02969dd0d..7fb8117455 100644 --- a/coregrind/pub_core_tooliface.h +++ b/coregrind/pub_core_tooliface.h @@ -211,6 +211,8 @@ typedef struct { void VG_REGPARM(1) (*track_new_mem_stack_160)(Addr); void (*track_new_mem_stack)(Addr,SizeT); + Bool any_new_mem_stack; // True if one or more track_new_mem_stack is set + void VG_REGPARM(1) (*track_die_mem_stack_4) (Addr); void VG_REGPARM(1) (*track_die_mem_stack_8) (Addr); void VG_REGPARM(1) (*track_die_mem_stack_12) (Addr); @@ -222,6 +224,8 @@ typedef struct { void VG_REGPARM(1) (*track_die_mem_stack_160)(Addr); void (*track_die_mem_stack)(Addr, SizeT); + Bool any_die_mem_stack; // True if one or more track_die_mem_stack is set + void (*track_ban_mem_stack)(Addr, SizeT); void (*track_pre_mem_read) (CorePart, ThreadId, const HChar*, Addr, SizeT); @@ -255,7 +259,9 @@ extern VgToolInterface VG_(tdict); Miscellaneous functions ------------------------------------------------------------------ */ -Bool VG_(sanity_check_needs) ( const HChar** failmsg ); +/* Sanity checks and finish the initialisation of the tool needs. + Returns False and sets a failmsg if the needs are inconsistent. */ +Bool VG_(finish_needs_init) ( const HChar** failmsg ); #endif // __PUB_CORE_TOOLIFACE_H diff --git a/helgrind/hg_main.c b/helgrind/hg_main.c index 95945f36bb..e68ff2d9cf 100644 --- a/helgrind/hg_main.c +++ b/helgrind/hg_main.c @@ -1032,12 +1032,13 @@ static void shadow_mem_cwrite_range ( Thread* thr, Addr a, SizeT len ) { LIBHB_CWRITE_N(hbthr, a, len); } -static void shadow_mem_make_New ( Thread* thr, Addr a, SizeT len ) +inline static void shadow_mem_make_New ( Thread* thr, Addr a, SizeT len ) { libhb_srange_new( thr->hbthr, a, len ); } -static void shadow_mem_make_NoAccess_NoFX ( Thread* thr, Addr aIN, SizeT len ) +inline static void shadow_mem_make_NoAccess_NoFX ( Thread* thr, Addr aIN, + SizeT len ) { if (0 && len > 500) VG_(printf)("make NoAccess_NoFX ( %#lx, %lu )\n", aIN, len ); @@ -1045,7 +1046,8 @@ static void shadow_mem_make_NoAccess_NoFX ( Thread* thr, Addr aIN, SizeT len ) libhb_srange_noaccess_NoFX( thr->hbthr, aIN, len ); } -static void shadow_mem_make_NoAccess_AHAE ( Thread* thr, Addr aIN, SizeT len ) +inline static void shadow_mem_make_NoAccess_AHAE ( Thread* thr, Addr aIN, + SizeT len) { if (0 && len > 500) VG_(printf)("make NoAccess_AHAE ( %#lx, %lu )\n", aIN, len ); @@ -1053,7 +1055,8 @@ static void shadow_mem_make_NoAccess_AHAE ( Thread* thr, Addr aIN, SizeT len ) libhb_srange_noaccess_AHAE( thr->hbthr, aIN, len ); } -static void shadow_mem_make_Untracked ( Thread* thr, Addr aIN, SizeT len ) +inline static void shadow_mem_make_Untracked ( Thread* thr, Addr aIN, + SizeT len ) { if (0 && len > 500) VG_(printf)("make Untracked ( %#lx, %lu )\n", aIN, len ); @@ -1488,6 +1491,30 @@ void evh__new_mem_stack ( Addr a, SizeT len ) { shadow_mem_make_Untracked( thr, a, len ); } +#define DCL_evh__new_mem_stack(syze) \ +static void VG_REGPARM(1) evh__new_mem_stack_##syze(Addr new_SP) \ +{ \ + Thread *thr = get_current_Thread(); \ + if (SHOW_EVENTS >= 2) \ + VG_(printf)("evh__new_mem_stack_" #syze "(%p, %lu)\n", \ + (void*)new_SP, (SizeT)syze ); \ + shadow_mem_make_New( thr, -VG_STACK_REDZONE_SZB + new_SP, syze ); \ + if (syze >= SCE_BIGRANGE_T && (HG_(clo_sanity_flags) & SCE_BIGRANGE)) \ + all__sanity_check("evh__new_mem_stack_" #syze "-post"); \ + if (UNLIKELY(thr->pthread_create_nesting_level > 0)) \ + shadow_mem_make_Untracked( thr, new_SP, syze ); \ +} + +DCL_evh__new_mem_stack(4); +DCL_evh__new_mem_stack(8); +DCL_evh__new_mem_stack(12); +DCL_evh__new_mem_stack(16); +DCL_evh__new_mem_stack(32); +DCL_evh__new_mem_stack(112); +DCL_evh__new_mem_stack(128); +DCL_evh__new_mem_stack(144); +DCL_evh__new_mem_stack(160); + static void evh__new_mem_w_tid ( Addr a, SizeT len, ThreadId tid ) { Thread *thr = get_current_Thread(); @@ -6028,6 +6055,15 @@ static void hg_pre_clo_init ( void ) VG_(track_new_mem_brk) ( evh__new_mem_w_tid ); VG_(track_new_mem_mmap) ( evh__new_mem_w_perms ); VG_(track_new_mem_stack) ( evh__new_mem_stack ); + VG_(track_new_mem_stack_4) ( evh__new_mem_stack_4 ); + VG_(track_new_mem_stack_8) ( evh__new_mem_stack_8 ); + VG_(track_new_mem_stack_12) ( evh__new_mem_stack_12 ); + VG_(track_new_mem_stack_16) ( evh__new_mem_stack_16 ); + VG_(track_new_mem_stack_32) ( evh__new_mem_stack_32 ); + VG_(track_new_mem_stack_112) ( evh__new_mem_stack_112 ); + VG_(track_new_mem_stack_128) ( evh__new_mem_stack_128 ); + VG_(track_new_mem_stack_144) ( evh__new_mem_stack_144 ); + VG_(track_new_mem_stack_160) ( evh__new_mem_stack_160 ); // FIXME: surely this isn't thread-aware VG_(track_copy_mem_remap) ( evh__copy_mem );