From: Julian Seward Date: Thu, 29 Jul 2010 15:39:05 +0000 (+0000) Subject: Only decode LZCNT if the host supports it, since otherwise we risk X-Git-Tag: svn/VALGRIND_3_6_1^2~77 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=5167bf2c5b2b400e030e12e7ed06fbe7f79def53;p=thirdparty%2Fvalgrind.git Only decode LZCNT if the host supports it, since otherwise we risk confusing it with BSR. Followup to #212335. git-svn-id: svn://svn.valgrind.org/vex/trunk@1995 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 2270e424df..796173fcbc 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -15099,14 +15099,13 @@ DisResult disInstr_AMD64_WRK ( goto decode_success; } - /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, but - fortunately occupying opcode space which AFAICS is not occupied - by anything else, even in Intel land. NB: 0F BD is BSR, but - that's decoded below here, and we reject it if there's an F3 - prefix. Hence there is no possibility of confusion with this - one. */ + /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, + which we can only decode if we're sure this is an AMD cpu that + supports LZCNT, since otherwise it's BSR, which behaves + differently. */ if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */ - && insn[0] == 0x0F && insn[1] == 0xBD) { + && insn[0] == 0x0F && insn[1] == 0xBD + && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) { vassert(sz == 2 || sz == 4 || sz == 8); /*IRType*/ ty = szToITy(sz); IRTemp src = newTemp(ty); diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index 991d1a076e..85f1d3bb4f 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -12613,13 +12613,12 @@ DisResult disInstr_X86_WRK ( goto decode_success; } - /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, but - fortunately occupying opcode space which AFAICS is not occupied - by anything else, even in Intel land. NB: 0F BD is BSR, but - that's decoded below here, and it won't match there's an F3 - prefix. Hence there is no possibility of confusion with this - one. */ - if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD) { + /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, + which we can only decode if we're sure this is an AMD cpu that + supports LZCNT, since otherwise it's BSR, which behaves + differently. */ + if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD + && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) { vassert(sz == 2 || sz == 4); /*IRType*/ ty = szToITy(sz); IRTemp src = newTemp(ty); diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index ff38e17bb1..cf89d535c0 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -4122,8 +4122,10 @@ HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host, /* sanity ... */ vassert(arch_host == VexArchAMD64); - vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3 - |VEX_HWCAPS_AMD64_CX16))); + vassert(0 == (hwcaps_host + & ~(VEX_HWCAPS_AMD64_SSE3 + | VEX_HWCAPS_AMD64_CX16 + | VEX_HWCAPS_AMD64_LZCNT))); /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index 16b5f09df8..9a6d6515c9 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -703,8 +703,10 @@ X86Instr* X86Instr_MFence ( UInt hwcaps ) { X86Instr* i = LibVEX_Alloc(sizeof(X86Instr)); i->tag = Xin_MFence; i->Xin.MFence.hwcaps = hwcaps; - vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1|VEX_HWCAPS_X86_SSE2 - |VEX_HWCAPS_X86_SSE3))); + vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1 + |VEX_HWCAPS_X86_SSE2 + |VEX_HWCAPS_X86_SSE3 + |VEX_HWCAPS_X86_LZCNT))); return i; } X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) { diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index e1242d69b6..e6d92e0e49 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -4000,9 +4000,11 @@ HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host, /* sanity ... */ vassert(arch_host == VexArchX86); - vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_X86_SSE1 - |VEX_HWCAPS_X86_SSE2 - |VEX_HWCAPS_X86_SSE3))); + vassert(0 == (hwcaps_host + & ~(VEX_HWCAPS_X86_SSE1 + | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_SSE3 + | VEX_HWCAPS_X86_LZCNT))); /* Make up an initial environment to use. */ env = LibVEX_Alloc(sizeof(ISelEnv)); diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 962b952e7d..711c85a373 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -754,32 +754,53 @@ void LibVEX_default_VexAbiInfo ( /*OUT*/VexAbiInfo* vbi ) static HChar* show_hwcaps_x86 ( UInt hwcaps ) { /* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */ - if (hwcaps == 0) - return "x86-sse0"; - if (hwcaps == VEX_HWCAPS_X86_SSE1) - return "x86-sse1"; - if (hwcaps == (VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2)) - return "x86-sse1-sse2"; - if (hwcaps == (VEX_HWCAPS_X86_SSE1 - | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3)) - return "x86-sse1-sse2-sse3"; - - return NULL; + switch (hwcaps) { + case 0: + return "x86-sse0"; + case VEX_HWCAPS_X86_SSE1: + return "x86-sse1"; + case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2: + return "x86-sse1-sse2"; + case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_LZCNT: + return "x86-sse1-sse2-lzcnt"; + case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_SSE3: + return "x86-sse1-sse2-sse3"; + case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2 + | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT: + return "x86-sse1-sse2-sse3-lzcnt"; + default: + return NULL; + } } static HChar* show_hwcaps_amd64 ( UInt hwcaps ) { /* SSE3 and CX16 are orthogonal and > baseline, although we really don't expect to come across anything which can do SSE3 but can't - do CX16. Still, we can handle that case. */ - const UInt SSE3 = VEX_HWCAPS_AMD64_SSE3; - const UInt CX16 = VEX_HWCAPS_AMD64_CX16; - UInt c = hwcaps; - if (c == 0) return "amd64-sse2"; - if (c == SSE3) return "amd64-sse3"; - if (c == CX16) return "amd64-sse2-cx16"; - if (c == (SSE3|CX16)) return "amd64-sse3-cx16"; - return NULL; + do CX16. Still, we can handle that case. LZCNT is similarly + orthogonal. */ + switch (hwcaps) { + case 0: + return "amd64-sse2"; + case VEX_HWCAPS_AMD64_SSE3: + return "amd64-sse3"; + case VEX_HWCAPS_AMD64_CX16: + return "amd64-sse2-cx16"; + case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16: + return "amd64-sse3-cx16"; + case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_LZCNT: + return "amd64-sse3-lzcnt"; + case VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT: + return "amd64-sse2-cx16-lzcnt"; + case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16 + | VEX_HWCAPS_AMD64_LZCNT: + return "amd64-sse3-cx16-lzcnt"; + + default: + return NULL; + } } static HChar* show_hwcaps_ppc32 ( UInt hwcaps ) diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index 97cd76bf90..fc66fadd6d 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -69,27 +69,29 @@ typedef /* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with cmpxchg8b. */ -#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */ -#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */ -#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */ +#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */ +#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */ +#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */ +#define VEX_HWCAPS_X86_LZCNT (1<<4) /* SSE4a LZCNT insn */ /* amd64: baseline capability is SSE2, with cmpxchg8b but not cmpxchg16b. */ -#define VEX_HWCAPS_AMD64_SSE3 (1<<4) /* SSE3 support */ -#define VEX_HWCAPS_AMD64_CX16 (1<<5) /* cmpxchg16b support */ +#define VEX_HWCAPS_AMD64_SSE3 (1<<5) /* SSE3 support */ +#define VEX_HWCAPS_AMD64_CX16 (1<<6) /* cmpxchg16b support */ +#define VEX_HWCAPS_AMD64_LZCNT (1<<7) /* SSE4a LZCNT insn */ /* ppc32: baseline capability is integer only */ -#define VEX_HWCAPS_PPC32_F (1<<6) /* basic (non-optional) FP */ -#define VEX_HWCAPS_PPC32_V (1<<7) /* Altivec (VMX) */ -#define VEX_HWCAPS_PPC32_FX (1<<8) /* FP extns (fsqrt, fsqrts) */ -#define VEX_HWCAPS_PPC32_GX (1<<9) /* Graphics extns - (fres,frsqrte,fsel,stfiwx) */ +#define VEX_HWCAPS_PPC32_F (1<<8) /* basic (non-optional) FP */ +#define VEX_HWCAPS_PPC32_V (1<<9) /* Altivec (VMX) */ +#define VEX_HWCAPS_PPC32_FX (1<<10) /* FP extns (fsqrt, fsqrts) */ +#define VEX_HWCAPS_PPC32_GX (1<<11) /* Graphics extns + (fres,frsqrte,fsel,stfiwx) */ /* ppc64: baseline capability is integer and basic FP insns */ -#define VEX_HWCAPS_PPC64_V (1<<10) /* Altivec (VMX) */ -#define VEX_HWCAPS_PPC64_FX (1<<11) /* FP extns (fsqrt, fsqrts) */ -#define VEX_HWCAPS_PPC64_GX (1<<12) /* Graphics extns - (fres,frsqrte,fsel,stfiwx) */ +#define VEX_HWCAPS_PPC64_V (1<<12) /* Altivec (VMX) */ +#define VEX_HWCAPS_PPC64_FX (1<<13) /* FP extns (fsqrt, fsqrts) */ +#define VEX_HWCAPS_PPC64_GX (1<<14) /* Graphics extns + (fres,frsqrte,fsel,stfiwx) */ /* arm: baseline capability is ARMv4 */ /* No extra capabilities */