From: Julian Seward Date: Sun, 3 Jun 2012 22:40:07 +0000 (+0000) Subject: m_machine: add new function VG_(machine_get_size_of_largest_guest_register) X-Git-Tag: svn/VALGRIND_3_8_0~263 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=76d7802c9f7f0678c1d509e267a7090172618ccc;p=thirdparty%2Fvalgrind.git m_machine: add new function VG_(machine_get_size_of_largest_guest_register) cachegrind: use the new function to abort startup if the minumum line size is smaller than the size of the largest guest register. Partially derived from a patch by Josef Weidendorfer. git-svn-id: svn://svn.valgrind.org/valgrind/trunk@12605 --- diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c index 4b36204cec..ba2d740b79 100644 --- a/cachegrind/cg_main.c +++ b/cachegrind/cg_main.c @@ -69,6 +69,12 @@ static Bool clo_cache_sim = True; /* do cache simulation? */ static Bool clo_branch_sim = False; /* do branch simulation? */ static Char* clo_cachegrind_out_file = "cachegrind.out.%p"; +/*------------------------------------------------------------*/ +/*--- Cachesim configuration ---*/ +/*------------------------------------------------------------*/ + +static Int min_line_size = 0; /* min of L1 and LL cache line sizes */ + /*------------------------------------------------------------*/ /*--- Types and Data Structures ---*/ /*------------------------------------------------------------*/ @@ -846,7 +852,7 @@ void addEvent_Dr ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) { Event* evt; tl_assert(isIRAtom(ea)); - tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); + tl_assert(datasize >= 1 && datasize <= min_line_size); if (!clo_cache_sim) return; if (cgs->events_used == N_EVENTS) @@ -868,7 +874,7 @@ void addEvent_Dw ( CgState* cgs, InstrInfo* inode, Int datasize, IRAtom* ea ) Event* evt; tl_assert(isIRAtom(ea)); - tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE); + tl_assert(datasize >= 1 && datasize <= min_line_size); if (!clo_cache_sim) return; @@ -1058,8 +1064,8 @@ IRSB* cg_instrument ( VgCallbackClosure* closure, // instructions will be done inaccurately, but they're // very rare and this avoids errors from hitting more // than two cache lines in the simulation. - if (dataSize > MIN_LINE_SIZE) - dataSize = MIN_LINE_SIZE; + if (dataSize > min_line_size) + dataSize = min_line_size; if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr ); if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) @@ -1085,8 +1091,8 @@ IRSB* cg_instrument ( VgCallbackClosure* closure, if (cas->dataHi != NULL) dataSize *= 2; /* since it's a doubleword-CAS */ /* I don't think this can ever happen, but play safe. */ - if (dataSize > MIN_LINE_SIZE) - dataSize = MIN_LINE_SIZE; + if (dataSize > min_line_size) + dataSize = min_line_size; addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr ); addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr ); break; @@ -1724,6 +1730,26 @@ static void cg_post_clo_init(void) &clo_D1_cache, &clo_LL_cache); + // min_line_size is used to make sure that we never feed + // accesses to the simulator straddling more than two + // cache lines at any cache level + min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size; + min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size; + + Int largest_load_or_store_size + = VG_(machine_get_size_of_largest_guest_register)(); + if (min_line_size < largest_load_or_store_size) { + /* We can't continue, because the cache simulation might + straddle more than 2 lines, and it will assert. So let's + just stop before we start. */ + VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n", + (Int)min_line_size); + VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n", + largest_load_or_store_size ); + VG_(umsg)(" but it is not. Exiting now.\n"); + VG_(exit)(1); + } + cachesim_I1_initcache(I1c); cachesim_D1_initcache(D1c); cachesim_LL_initcache(LLc); diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c index ffe41ee713..9980640ddf 100644 --- a/coregrind/m_machine.c +++ b/coregrind/m_machine.c @@ -367,7 +367,7 @@ SizeT VG_(thread_get_altstack_size)(ThreadId tid) static Bool hwcaps_done = False; /* --- all archs --- */ -static VexArch va; +static VexArch va = VexArch_INVALID; static VexArchInfo vai; #if defined(VGA_x86) @@ -1316,6 +1316,66 @@ void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, } +/* Returns the size of the largest guest register that we will + simulate in this run. This depends on both the guest architecture + and on the specific capabilities we are simulating for that guest + (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16 + or 32. General rule: if in doubt, return a value larger than + reality. + + This information is needed by Cachegrind and Callgrind to decide + what the minimum cache line size they are prepared to simulate is. + Basically require that the minimum cache line size is at least as + large as the largest register that might get transferred to/from + memory, so as to guarantee that any such transaction can straddle + at most 2 cache lines. +*/ +Int VG_(machine_get_size_of_largest_guest_register) ( void ) +{ + vg_assert(hwcaps_done); + /* Once hwcaps_done is True, we can fish around inside va/vai to + find the information we need. */ + +# if defined(VGA_x86) + vg_assert(va == VexArchX86); + /* We don't support AVX, so 32 is out. At the other end, even if + we don't support any SSE, the X87 can generate 10 byte + transfers, so let's say 16 to be on the safe side. Hence the + answer is always 16. */ + return 16; + +# elif defined(VGA_amd64) + /* if AVX then 32 else 16 */ + return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16; + +# elif defined(VGA_ppc32) + /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ + if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16; + if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16; + if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16; + return 8; + +# elif defined(VGA_ppc64) + /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ + if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16; + if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16; + if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16; + return 8; + +# elif defined(VGA_s390x) + return 8; + +# elif defined(VGA_arm) + /* Really it depends whether or not we have NEON, but let's just + assume we always do. */ + return 16; + +# else +# error "Unknown arch" +# endif +} + + // Given a pointer to a function as obtained by "& functionname" in C, // produce a pointer to the actual entry point for the function. void* VG_(fnptr_to_fnentry)( void* f ) diff --git a/include/pub_tool_machine.h b/include/pub_tool_machine.h index 49c700d3e1..6b30e69b10 100644 --- a/include/pub_tool_machine.h +++ b/include/pub_tool_machine.h @@ -152,6 +152,12 @@ extern SizeT VG_(thread_get_altstack_size) ( ThreadId tid ); // ppc64-linux it isn't (sigh). extern void* VG_(fnptr_to_fnentry)( void* ); +/* Returns the size of the largest guest register that we will + simulate in this run. This depends on both the guest architecture + and on the specific capabilities we are simulating for that guest + (eg, AVX or non-AVX ?, for amd64). */ +extern Int VG_(machine_get_size_of_largest_guest_register) ( void ); + #endif // __PUB_TOOL_MACHINE_H /*--------------------------------------------------------------------*/