static Bool clo_branch_sim = False; /* do branch simulation? */
static Char* clo_cachegrind_out_file = "cachegrind.out.%p";
+/*------------------------------------------------------------*/
+/*--- Cachesim configuration ---*/
+/*------------------------------------------------------------*/
+
+static Int min_line_size = 0; /* min of L1 and LL cache line sizes */
+
/*------------------------------------------------------------*/
/*--- Types and Data Structures ---*/
/*------------------------------------------------------------*/
{
Event* evt;
tl_assert(isIRAtom(ea));
- tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+ tl_assert(datasize >= 1 && datasize <= min_line_size);
if (!clo_cache_sim)
return;
if (cgs->events_used == N_EVENTS)
Event* evt;
tl_assert(isIRAtom(ea));
- tl_assert(datasize >= 1 && datasize <= MIN_LINE_SIZE);
+ tl_assert(datasize >= 1 && datasize <= min_line_size);
if (!clo_cache_sim)
return;
// instructions will be done inaccurately, but they're
// very rare and this avoids errors from hitting more
// than two cache lines in the simulation.
- if (dataSize > MIN_LINE_SIZE)
- dataSize = MIN_LINE_SIZE;
+ if (dataSize > min_line_size)
+ dataSize = min_line_size;
if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
addEvent_Dr( &cgs, curr_inode, dataSize, d->mAddr );
if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
if (cas->dataHi != NULL)
dataSize *= 2; /* since it's a doubleword-CAS */
/* I don't think this can ever happen, but play safe. */
- if (dataSize > MIN_LINE_SIZE)
- dataSize = MIN_LINE_SIZE;
+ if (dataSize > min_line_size)
+ dataSize = min_line_size;
addEvent_Dr( &cgs, curr_inode, dataSize, cas->addr );
addEvent_Dw( &cgs, curr_inode, dataSize, cas->addr );
break;
&clo_D1_cache,
&clo_LL_cache);
+ // min_line_size is used to make sure that we never feed
+ // accesses to the simulator straddling more than two
+ // cache lines at any cache level
+ min_line_size = (I1c.line_size < D1c.line_size) ? I1c.line_size : D1c.line_size;
+ min_line_size = (LLc.line_size < min_line_size) ? LLc.line_size : min_line_size;
+
+ Int largest_load_or_store_size
+ = VG_(machine_get_size_of_largest_guest_register)();
+ if (min_line_size < largest_load_or_store_size) {
+ /* We can't continue, because the cache simulation might
+ straddle more than 2 lines, and it will assert. So let's
+ just stop before we start. */
+ VG_(umsg)("Cachegrind: cannot continue: the minimum line size (%d)\n",
+ (Int)min_line_size);
+ VG_(umsg)(" must be equal to or larger than the maximum register size (%d)\n",
+ largest_load_or_store_size );
+ VG_(umsg)(" but it is not. Exiting now.\n");
+ VG_(exit)(1);
+ }
+
cachesim_I1_initcache(I1c);
cachesim_D1_initcache(D1c);
cachesim_LL_initcache(LLc);
static Bool hwcaps_done = False;
/* --- all archs --- */
-static VexArch va;
+static VexArch va = VexArch_INVALID;
static VexArchInfo vai;
#if defined(VGA_x86)
}
+/* Returns the size of the largest guest register that we will
+ simulate in this run. This depends on both the guest architecture
+ and on the specific capabilities we are simulating for that guest
+ (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16
+ or 32. General rule: if in doubt, return a value larger than
+ reality.
+
+ This information is needed by Cachegrind and Callgrind to decide
+ what the minimum cache line size they are prepared to simulate is.
+ Basically require that the minimum cache line size is at least as
+ large as the largest register that might get transferred to/from
+ memory, so as to guarantee that any such transaction can straddle
+ at most 2 cache lines.
+*/
+Int VG_(machine_get_size_of_largest_guest_register) ( void )
+{
+ vg_assert(hwcaps_done);
+ /* Once hwcaps_done is True, we can fish around inside va/vai to
+ find the information we need. */
+
+# if defined(VGA_x86)
+ vg_assert(va == VexArchX86);
+ /* We don't support AVX, so 32 is out. At the other end, even if
+ we don't support any SSE, the X87 can generate 10 byte
+ transfers, so let's say 16 to be on the safe side. Hence the
+ answer is always 16. */
+ return 16;
+
+# elif defined(VGA_amd64)
+ /* if AVX then 32 else 16 */
+ return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
+
+# elif defined(VGA_ppc32)
+ /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+ if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
+ return 8;
+
+# elif defined(VGA_ppc64)
+ /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
+ if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
+ if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
+ return 8;
+
+# elif defined(VGA_s390x)
+ return 8;
+
+# elif defined(VGA_arm)
+ /* Really it depends whether or not we have NEON, but let's just
+ assume we always do. */
+ return 16;
+
+# else
+# error "Unknown arch"
+# endif
+}
+
+
// Given a pointer to a function as obtained by "& functionname" in C,
// produce a pointer to the actual entry point for the function.
void* VG_(fnptr_to_fnentry)( void* f )