From: Julian Seward Date: Tue, 26 Mar 2013 13:53:18 +0000 (+0000) Subject: Implement RDTSCP on amd64, finally. This fixes #251569 and dups X-Git-Tag: svn/VALGRIND_3_9_0^2~94 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=17bd82a56036d0eff21ec3ebf836d981fd216f4d;p=thirdparty%2Fvalgrind.git Implement RDTSCP on amd64, finally. This fixes #251569 and dups 311933, 313348 and 313354. git-svn-id: svn://svn.valgrind.org/vex/trunk@2701 --- diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h index 303d4eb54c..487f6f89a9 100644 --- a/VEX/priv/guest_amd64_defs.h +++ b/VEX/priv/guest_amd64_defs.h @@ -171,6 +171,7 @@ extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord ); extern VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State*, HWord ); extern ULong amd64g_dirtyhelper_RDTSC ( void ); +extern void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ); extern ULong amd64g_dirtyhelper_IN ( ULong portno, ULong sz/*1,2 or 4*/ ); extern void amd64g_dirtyhelper_OUT ( ULong portno, ULong data, diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c index 53546bd3de..488757fe04 100644 --- a/VEX/priv/guest_amd64_helpers.c +++ b/VEX/priv/guest_amd64_helpers.c @@ -2837,6 +2837,25 @@ ULong amd64g_dirtyhelper_RDTSC ( void ) # endif } +/* CALLED FROM GENERATED CODE */ +/* DIRTY HELPER (non-referentially-transparent) */ +/* Horrible hack. On non-amd64 platforms, return 1. */ +/* This uses a different calling convention from _RDTSC just above + only because of the difficulty of returning 96 bits from a C + function -- RDTSC returns 64 bits and so is simple by comparison, + on amd64. */ +void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ) +{ +# if defined(__x86_64__) + UInt eax, ecx, edx; + __asm__ __volatile__("rdtscp" : "=a" (eax), "=d" (edx), "=c" (ecx)); + st->guest_RAX = (ULong)eax; + st->guest_RCX = (ULong)ecx; + st->guest_RDX = (ULong)edx; +# else + /* Do nothing. */ +# endif +} /* CALLED FROM GENERATED CODE */ /* DIRTY HELPER (non-referentially-transparent) */ diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 7e98e761e6..2b200fc190 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -63,8 +63,6 @@ * FINIT not only initialises the FPU environment, it also zeroes all the FP registers. It should leave the registers unchanged. - RDTSC returns zero, always. - SAHF should cause eflags[1] == 1, and in fact it produces 0. As per Intel docs this bit has no meaning anyway. Since PUSHF is the only way to observe eflags[1], a proper fix would be to make that @@ -19864,6 +19862,39 @@ Long dis_ESC_0F ( putIRegRDX(4, mkU32(0)); return delta; } + /* 0F 01 F9 = RDTSCP */ + if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) { + delta += 1; + /* Uses dirty helper: + void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* ) + declared to wr rax, rcx, rdx + */ + const HChar* fName = "amd64g_dirtyhelper_RDTSCP"; + void* fAddr = &amd64g_dirtyhelper_RDTSCP; + IRDirty* d + = unsafeIRDirty_0_N ( 0/*regparms*/, + fName, fAddr, mkIRExprVec_0() ); + /* declare guest state effects */ + d->needsBBP = True; + d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_RAX; + d->fxState[0].size = 8; + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_RCX; + d->fxState[1].size = 8; + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_RDX; + d->fxState[2].size = 8; + /* execute the dirty call, side-effecting guest state */ + stmt( IRStmt_Dirty(d) ); + /* RDTSCP is a serialising insn. So, just in case someone is + using it as a memory fence ... */ + stmt( IRStmt_MBE(Imbe_Fence) ); + DIP("rdtscp\n"); + return delta; + } /* else decode failed */ break; } diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index 49a3773596..e7b878eb2f 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -4302,7 +4302,8 @@ HInstrArray* iselSB_AMD64 ( IRSB* bb, & ~(VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT - | VEX_HWCAPS_AMD64_AVX))); + | VEX_HWCAPS_AMD64_AVX + | VEX_HWCAPS_AMD64_RDTSCP))); /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index a4d4240ae1..aa45d11efb 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -1205,37 +1205,48 @@ static const HChar* show_hwcaps_amd64 ( UInt hwcaps ) /* SSE3 and CX16 are orthogonal and > baseline, although we really don't expect to come across anything which can do SSE3 but can't do CX16. Still, we can handle that case. LZCNT is similarly - orthogonal. AVX is technically orthogonal, but just add the - cases we actually come across. (This scheme for printing is - very stupid. We should add strings independently based on - feature bits, but then it would be hard to return a string that - didn't need deallocating by the caller.) */ - /* FIXME: show_hwcaps_s390x is a much better way to do this. */ - switch (hwcaps) { - case 0: - return "amd64-sse2"; - case VEX_HWCAPS_AMD64_SSE3: - return "amd64-sse3"; - case VEX_HWCAPS_AMD64_CX16: - return "amd64-sse2-cx16"; - case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16: - return "amd64-sse3-cx16"; - case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_LZCNT: - return "amd64-sse3-lzcnt"; - case VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT: - return "amd64-sse2-cx16-lzcnt"; - case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16 - | VEX_HWCAPS_AMD64_LZCNT: - return "amd64-sse3-cx16-lzcnt"; - case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16 - | VEX_HWCAPS_AMD64_AVX: - return "amd64-sse3-cx16-avx"; - case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16 - | VEX_HWCAPS_AMD64_LZCNT | VEX_HWCAPS_AMD64_AVX: - return "amd64-sse3-cx16-lzcnt-avx"; - default: - return NULL; + orthogonal. */ + + /* Throw out obviously stupid cases: */ + /* AVX without SSE3 */ + Bool have_sse3 = (hwcaps & VEX_HWCAPS_AMD64_SSE3) != 0; + Bool have_avx = (hwcaps & VEX_HWCAPS_AMD64_AVX) != 0; + if (have_avx && !have_sse3) + return NULL; + + /* This isn't threadsafe. We might need to fix it at some point. */ + static HChar buf[100] = { 0 }; + if (buf[0] != 0) return buf; /* already constructed */ + + vex_bzero(buf, sizeof(buf)); + + HChar* p = &buf[0]; + + p = p + vex_sprintf(p, "%s", "amd64"); + if (hwcaps == 0) { + /* special-case the baseline case */ + p = p + vex_sprintf(p, "%s", "-sse2"); + goto out; + } + if (hwcaps & VEX_HWCAPS_AMD64_CX16) { + p = p + vex_sprintf(p, "%s", "-cx16"); + } + if (hwcaps & VEX_HWCAPS_AMD64_LZCNT) { + p = p + vex_sprintf(p, "%s", "-lzcnt"); + } + if (hwcaps & VEX_HWCAPS_AMD64_RDTSCP) { + p = p + vex_sprintf(p, "%s", "-rdtscp"); } + if (hwcaps & VEX_HWCAPS_AMD64_SSE3) { + p = p + vex_sprintf(p, "%s", "-sse3"); + } + if (hwcaps & VEX_HWCAPS_AMD64_AVX) { + p = p + vex_sprintf(p, "%s", "-avx"); + } + + out: + vassert(buf[sizeof(buf)-1] == 0); + return buf; } static const HChar* show_hwcaps_ppc32 ( UInt hwcaps ) diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index 8e57dbbd5b..b1061fc9a6 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -83,6 +83,7 @@ typedef #define VEX_HWCAPS_AMD64_CX16 (1<<6) /* cmpxchg16b support */ #define VEX_HWCAPS_AMD64_LZCNT (1<<7) /* SSE4a LZCNT insn */ #define VEX_HWCAPS_AMD64_AVX (1<<8) /* AVX instructions */ +#define VEX_HWCAPS_AMD64_RDTSCP (1<<9) /* RDTSCP instruction */ /* ppc32: baseline capability is integer only */ #define VEX_HWCAPS_PPC32_F (1<<8) /* basic (non-optional) FP */