confusing it with BSR. Followup to #212335.
git-svn-id: svn://svn.valgrind.org/vex/trunk@1995
goto decode_success;
}
- /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, but
- fortunately occupying opcode space which AFAICS is not occupied
- by anything else, even in Intel land. NB: 0F BD is BSR, but
- that's decoded below here, and we reject it if there's an F3
- prefix. Hence there is no possibility of confusion with this
- one. */
+ /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
+ which we can only decode if we're sure this is an AMD cpu that
+ supports LZCNT, since otherwise it's BSR, which behaves
+ differently. */
if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
- && insn[0] == 0x0F && insn[1] == 0xBD) {
+ && insn[0] == 0x0F && insn[1] == 0xBD
+ && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
vassert(sz == 2 || sz == 4 || sz == 8);
/*IRType*/ ty = szToITy(sz);
IRTemp src = newTemp(ty);
goto decode_success;
}
- /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, but
- fortunately occupying opcode space which AFAICS is not occupied
- by anything else, even in Intel land. NB: 0F BD is BSR, but
- that's decoded below here, and it won't match there's an F3
- prefix. Hence there is no possibility of confusion with this
- one. */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD) {
+ /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
+ which we can only decode if we're sure this is an AMD cpu that
+ supports LZCNT, since otherwise it's BSR, which behaves
+ differently. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xBD
+ && 0 != (archinfo->hwcaps & VEX_HWCAPS_X86_LZCNT)) {
vassert(sz == 2 || sz == 4);
/*IRType*/ ty = szToITy(sz);
IRTemp src = newTemp(ty);
/* sanity ... */
vassert(arch_host == VexArchAMD64);
- vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_AMD64_SSE3
- |VEX_HWCAPS_AMD64_CX16)));
+ vassert(0 == (hwcaps_host
+ & ~(VEX_HWCAPS_AMD64_SSE3
+ | VEX_HWCAPS_AMD64_CX16
+ | VEX_HWCAPS_AMD64_LZCNT)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_MFence;
i->Xin.MFence.hwcaps = hwcaps;
- vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1|VEX_HWCAPS_X86_SSE2
- |VEX_HWCAPS_X86_SSE3)));
+ vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_SSE1
+ |VEX_HWCAPS_X86_SSE2
+ |VEX_HWCAPS_X86_SSE3
+ |VEX_HWCAPS_X86_LZCNT)));
return i;
}
X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
/* sanity ... */
vassert(arch_host == VexArchX86);
- vassert(0 == (hwcaps_host & ~(VEX_HWCAPS_X86_SSE1
- |VEX_HWCAPS_X86_SSE2
- |VEX_HWCAPS_X86_SSE3)));
+ vassert(0 == (hwcaps_host
+ & ~(VEX_HWCAPS_X86_SSE1
+ | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3
+ | VEX_HWCAPS_X86_LZCNT)));
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
static HChar* show_hwcaps_x86 ( UInt hwcaps )
{
/* Monotonic, SSE3 > SSE2 > SSE1 > baseline. */
- if (hwcaps == 0)
- return "x86-sse0";
- if (hwcaps == VEX_HWCAPS_X86_SSE1)
- return "x86-sse1";
- if (hwcaps == (VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2))
- return "x86-sse1-sse2";
- if (hwcaps == (VEX_HWCAPS_X86_SSE1
- | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3))
- return "x86-sse1-sse2-sse3";
-
- return NULL;
+ switch (hwcaps) {
+ case 0:
+ return "x86-sse0";
+ case VEX_HWCAPS_X86_SSE1:
+ return "x86-sse1";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2:
+ return "x86-sse1-sse2";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_LZCNT:
+ return "x86-sse1-sse2-lzcnt";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3:
+ return "x86-sse1-sse2-sse3";
+ case VEX_HWCAPS_X86_SSE1 | VEX_HWCAPS_X86_SSE2
+ | VEX_HWCAPS_X86_SSE3 | VEX_HWCAPS_X86_LZCNT:
+ return "x86-sse1-sse2-sse3-lzcnt";
+ default:
+ return NULL;
+ }
}
static HChar* show_hwcaps_amd64 ( UInt hwcaps )
{
/* SSE3 and CX16 are orthogonal and > baseline, although we really
don't expect to come across anything which can do SSE3 but can't
- do CX16. Still, we can handle that case. */
- const UInt SSE3 = VEX_HWCAPS_AMD64_SSE3;
- const UInt CX16 = VEX_HWCAPS_AMD64_CX16;
- UInt c = hwcaps;
- if (c == 0) return "amd64-sse2";
- if (c == SSE3) return "amd64-sse3";
- if (c == CX16) return "amd64-sse2-cx16";
- if (c == (SSE3|CX16)) return "amd64-sse3-cx16";
- return NULL;
+ do CX16. Still, we can handle that case. LZCNT is similarly
+ orthogonal. */
+ switch (hwcaps) {
+ case 0:
+ return "amd64-sse2";
+ case VEX_HWCAPS_AMD64_SSE3:
+ return "amd64-sse3";
+ case VEX_HWCAPS_AMD64_CX16:
+ return "amd64-sse2-cx16";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16:
+ return "amd64-sse3-cx16";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse3-lzcnt";
+ case VEX_HWCAPS_AMD64_CX16 | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse2-cx16-lzcnt";
+ case VEX_HWCAPS_AMD64_SSE3 | VEX_HWCAPS_AMD64_CX16
+ | VEX_HWCAPS_AMD64_LZCNT:
+ return "amd64-sse3-cx16-lzcnt";
+
+ default:
+ return NULL;
+ }
}
static HChar* show_hwcaps_ppc32 ( UInt hwcaps )
/* x86: baseline capability is Pentium-1 (FPU, MMX, but no SSE), with
cmpxchg8b. */
-#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
-#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
-#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
+#define VEX_HWCAPS_X86_SSE1 (1<<1) /* SSE1 support (Pentium III) */
+#define VEX_HWCAPS_X86_SSE2 (1<<2) /* SSE2 support (Pentium 4) */
+#define VEX_HWCAPS_X86_SSE3 (1<<3) /* SSE3 support (>= Prescott) */
+#define VEX_HWCAPS_X86_LZCNT (1<<4) /* SSE4a LZCNT insn */
/* amd64: baseline capability is SSE2, with cmpxchg8b but not
cmpxchg16b. */
-#define VEX_HWCAPS_AMD64_SSE3 (1<<4) /* SSE3 support */
-#define VEX_HWCAPS_AMD64_CX16 (1<<5) /* cmpxchg16b support */
+#define VEX_HWCAPS_AMD64_SSE3 (1<<5) /* SSE3 support */
+#define VEX_HWCAPS_AMD64_CX16 (1<<6) /* cmpxchg16b support */
+#define VEX_HWCAPS_AMD64_LZCNT (1<<7) /* SSE4a LZCNT insn */
/* ppc32: baseline capability is integer only */
-#define VEX_HWCAPS_PPC32_F (1<<6) /* basic (non-optional) FP */
-#define VEX_HWCAPS_PPC32_V (1<<7) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC32_FX (1<<8) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC32_GX (1<<9) /* Graphics extns
- (fres,frsqrte,fsel,stfiwx) */
+#define VEX_HWCAPS_PPC32_F (1<<8) /* basic (non-optional) FP */
+#define VEX_HWCAPS_PPC32_V (1<<9) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC32_FX (1<<10) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC32_GX (1<<11) /* Graphics extns
+ (fres,frsqrte,fsel,stfiwx) */
/* ppc64: baseline capability is integer and basic FP insns */
-#define VEX_HWCAPS_PPC64_V (1<<10) /* Altivec (VMX) */
-#define VEX_HWCAPS_PPC64_FX (1<<11) /* FP extns (fsqrt, fsqrts) */
-#define VEX_HWCAPS_PPC64_GX (1<<12) /* Graphics extns
- (fres,frsqrte,fsel,stfiwx) */
+#define VEX_HWCAPS_PPC64_V (1<<12) /* Altivec (VMX) */
+#define VEX_HWCAPS_PPC64_FX (1<<13) /* FP extns (fsqrt, fsqrts) */
+#define VEX_HWCAPS_PPC64_GX (1<<14) /* Graphics extns
+ (fres,frsqrte,fsel,stfiwx) */
/* arm: baseline capability is ARMv4 */
/* No extra capabilities */