extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
extern void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st );
extern void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st );
-extern void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st );
+extern void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st,
+ ULong hasF16C, ULong hasRDRAND );
extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
extern void amd64g_dirtyhelper_SxDT ( void* address,
ULong op /* 0 or 1 */ );
+// This returns a 32-bit value from the host's RDRAND in bits 31:0, and the
+// resulting C flag value in bit 32.
+extern ULong amd64g_dirtyhelper_RDRAND ( void );
+
+
/* Helps with PCMP{I,E}STR{I,M}.
CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
address sizes : 39 bits physical, 48 bits virtual
power management:
*/
-void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st )
+void amd64g_dirtyhelper_CPUID_avx2 ( VexGuestAMD64State* st,
+ ULong hasF16C, ULong hasRDRAND )
{
+ vassert((hasF16C >> 1) == 0ULL);
+ vassert((hasRDRAND >> 1) == 0ULL);
# define SET_ABCD(_a,_b,_c,_d) \
do { st->guest_RAX = (ULong)(_a); \
st->guest_RBX = (ULong)(_b); \
case 0x00000000:
SET_ABCD(0x0000000d, 0x756e6547, 0x6c65746e, 0x49656e69);
break;
- case 0x00000001:
- /* Don't advertise RDRAND support, bit 30 in ECX. */
- SET_ABCD(0x000306c3, 0x02100800, 0x3ffafbff, 0xbfebfbff);
+ case 0x00000001: {
+ // As a baseline, advertise neither F16C (ecx:29) nor RDRAND (ecx:30),
+ // but patch in support for them as directed by the caller.
+ UInt ecx_extra
+ = (hasF16C ? (1U << 29) : 0) | (hasRDRAND ? (1U << 30) : 0);
+ SET_ABCD(0x000306c3, 0x02100800, (0x1ffafbff | ecx_extra), 0xbfebfbff);
break;
+ }
case 0x00000002:
SET_ABCD(0x76036301, 0x00f0b6ff, 0x00000000, 0x00c10000);
break;
# endif
}
+/* CALLED FROM GENERATED CODE */
+/* DIRTY HELPER (non-referentially-transparent) */
+/* Horrible hack. On non-amd64 platforms, do nothing. On amd64 targets, get a
+ 32 bit random number using RDRAND, and return it and the associated rflags.C
+ value. */
+ULong amd64g_dirtyhelper_RDRAND ( void ) {
+# if defined(__x86_64__)
+ ULong res = 0;
+ ULong cflag = 0;
+ __asm__ __volatile__(
+ "movq $0, %%r11 ; "
+ "movq $0, %%r12 ; "
+ "rdrand %%r11d ; "
+ "setc %%r12b ; "
+ "movq %%r11, %0 ; "
+ "movq %%r12, %1"
+ : "=r"(res), "=r"(cflag) : : "r11", "r12"
+ );
+ res &= 0xFFFFFFFFULL;
+ cflag &= 1ULL;
+ return (cflag << 32) | res;
+# else
+ /* There's nothing we can sensibly do. Return a value denoting
+ "I succeeded, and the random bits are all zero" :-/ */
+ return 1ULL << 32;
+# endif
+}
+
/*---------------------------------------------------------------*/
/*--- Helpers for MMX/SSE/SSE2. ---*/
/*---------------------------------------------------------------*/
toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
}
+/* Return True iff pfx has F2 and F3 clear */
+static Bool haveNoF2noF3 ( Prefix pfx )
+{
+ return
+ toBool((pfx & (PFX_F2|PFX_F3)) == 0);
+}
+
/* Return True iff pfx has 66, F2 and F3 clear */
static Bool haveNo66noF2noF3 ( Prefix pfx )
{
}
+/* Handles 128 and 256 bit versions of VCVTPH2PS. */
+static Long dis_VCVTPH2PS ( const VexAbiInfo* vbi, Prefix pfx,
+ Long delta, Bool is256bit )
+{
+ /* This is a width-doubling load or reg-reg move, that does conversion on the
+ transferred data. */
+ UChar modrm = getUChar(delta);
+ UInt rG = gregOfRexRM(pfx, modrm);
+ IRTemp srcE = newTemp(is256bit ? Ity_V128 : Ity_I64);
+
+ if (epartIsReg(modrm)) {
+ UInt rE = eregOfRexRM(pfx, modrm);
+ assign(srcE, is256bit ? unop(Iop_V256toV128_0, getYMMReg(rE))
+ : unop(Iop_V128to64, getXMMReg(rE)));
+ delta += 1;
+ DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE),
+ (is256bit ? nameYMMReg: nameXMMReg)(rG));
+ } else {
+ Int alen = 0;
+ HChar dis_buf[50];
+ IRTemp addr = disAMode(&alen, vbi, pfx, delta, dis_buf, 0);
+ // I don't think we need an alignment check here (not 100% sure tho.)
+ assign(srcE, loadLE(is256bit ? Ity_V128 : Ity_I64, mkexpr(addr)));
+ delta += alen;
+ DIP( "vcvtph2ps %s,%s\n", dis_buf,
+ (is256bit ? nameYMMReg: nameXMMReg)(rG));
+ }
+
+ IRExpr* res = unop(is256bit ? Iop_F16toF32x8 : Iop_F16toF32x4, mkexpr(srcE));
+ (is256bit ? putYMMReg : putYMMRegLoAndZU)(rG, res);
+
+ return delta;
+}
+
+
/* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx,
Long delta, Bool isAvx, Bool xIsZ )
}
vassert(fName); vassert(fAddr);
- d = unsafeIRDirty_0_N ( 0/*regparms*/,
- fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
+ IRExpr** args = NULL;
+ if (fAddr == &amd64g_dirtyhelper_CPUID_avx2) {
+ Bool hasF16C = (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C) != 0;
+ Bool hasRDRAND = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND) != 0;
+ args = mkIRExprVec_3(IRExpr_GSPTR(),
+ mkIRExpr_HWord(hasF16C ? 1 : 0),
+ mkIRExpr_HWord(hasRDRAND ? 1 : 0));
+ } else {
+ args = mkIRExprVec_1(IRExpr_GSPTR());
+ }
+ d = unsafeIRDirty_0_N ( 0/*regparms*/, fName, fAddr, args );
+
/* declare guest state effects */
d->nFxState = 4;
vex_bzero(&d->fxState, sizeof(d->fxState));
return delta;
}
- case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
- IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
- IRTemp expdHi = newTemp(elemTy);
- IRTemp expdLo = newTemp(elemTy);
- IRTemp dataHi = newTemp(elemTy);
- IRTemp dataLo = newTemp(elemTy);
- IRTemp oldHi = newTemp(elemTy);
- IRTemp oldLo = newTemp(elemTy);
- IRTemp flags_old = newTemp(Ity_I64);
- IRTemp flags_new = newTemp(Ity_I64);
- IRTemp success = newTemp(Ity_I1);
- IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
- IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
- IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
- IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
- IRTemp expdHi64 = newTemp(Ity_I64);
- IRTemp expdLo64 = newTemp(Ity_I64);
-
- /* Translate this using a DCAS, even if there is no LOCK
- prefix. Life is too short to bother with generating two
- different translations for the with/without-LOCK-prefix
- cases. */
- *expect_CAS = True;
-
- /* Decode, and generate address. */
- if (have66(pfx)) goto decode_failure;
- if (sz != 4 && sz != 8) goto decode_failure;
- if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
- goto decode_failure;
+ case 0xC7: {
modrm = getUChar(delta);
- if (epartIsReg(modrm)) goto decode_failure;
- if (gregLO3ofRM(modrm) != 1) goto decode_failure;
- if (haveF2orF3(pfx)) {
- /* Since the e-part is memory only, F2 or F3 (one or the
- other) is acceptable if LOCK is also present. But only
- for cmpxchg8b. */
- if (sz == 8) goto decode_failure;
- if (haveF2andF3(pfx) || !haveLOCK(pfx)) goto decode_failure;
- }
- addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
- delta += alen;
+ // Detecting valid CMPXCHG combinations is pretty complex.
+ Bool isValidCMPXCHG = gregLO3ofRM(modrm) == 1;
+ if (isValidCMPXCHG) {
+ if (have66(pfx)) isValidCMPXCHG = False;
+ if (sz != 4 && sz != 8) isValidCMPXCHG = False;
+ if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
+ isValidCMPXCHG = False;
+ if (epartIsReg(modrm)) isValidCMPXCHG = False;
+ if (haveF2orF3(pfx)) {
+ /* Since the e-part is memory only, F2 or F3 (one or the
+ other) is acceptable if LOCK is also present. But only
+ for cmpxchg8b. */
+ if (sz == 8) isValidCMPXCHG = False;
+ if (haveF2andF3(pfx) || !haveLOCK(pfx)) isValidCMPXCHG = False;
+ }
+ }
+
+ /* 0F C7 /1 (with qualifications) = CMPXCHG */
+ if (isValidCMPXCHG) {
+ // Note that we've already read the modrm byte by this point, but we
+ // haven't moved delta past it.
+ IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
+ IRTemp expdHi = newTemp(elemTy);
+ IRTemp expdLo = newTemp(elemTy);
+ IRTemp dataHi = newTemp(elemTy);
+ IRTemp dataLo = newTemp(elemTy);
+ IRTemp oldHi = newTemp(elemTy);
+ IRTemp oldLo = newTemp(elemTy);
+ IRTemp flags_old = newTemp(Ity_I64);
+ IRTemp flags_new = newTemp(Ity_I64);
+ IRTemp success = newTemp(Ity_I1);
+ IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
+ IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
+ IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
+ IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
+ IRTemp expdHi64 = newTemp(Ity_I64);
+ IRTemp expdLo64 = newTemp(Ity_I64);
+
+ /* Translate this using a DCAS, even if there is no LOCK
+ prefix. Life is too short to bother with generating two
+ different translations for the with/without-LOCK-prefix
+ cases. */
+ *expect_CAS = True;
- /* cmpxchg16b requires an alignment check. */
- if (sz == 8)
- gen_SEGV_if_not_16_aligned( addr );
+ /* Generate address */
+ vassert(!epartIsReg(modrm));
+ addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
+ delta += alen;
- /* Get the expected and new values. */
- assign( expdHi64, getIReg64(R_RDX) );
- assign( expdLo64, getIReg64(R_RAX) );
-
- /* These are the correctly-sized expected and new values.
- However, we also get expdHi64/expdLo64 above as 64-bits
- regardless, because we will need them later in the 32-bit
- case (paradoxically). */
- assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
- : mkexpr(expdHi64) );
- assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
- : mkexpr(expdLo64) );
- assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
- assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
-
- /* Do the DCAS */
- stmt( IRStmt_CAS(
- mkIRCAS( oldHi, oldLo,
- Iend_LE, mkexpr(addr),
- mkexpr(expdHi), mkexpr(expdLo),
- mkexpr(dataHi), mkexpr(dataLo)
- )));
-
- /* success when oldHi:oldLo == expdHi:expdLo */
- assign( success,
- binop(opCasCmpEQ,
- binop(opOR,
- binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
- binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
- ),
- zero
- ));
-
- /* If the DCAS is successful, that is to say oldHi:oldLo ==
- expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
- which is where they came from originally. Both the actual
- contents of these two regs, and any shadow values, are
- unchanged. If the DCAS fails then we're putting into
- RDX:RAX the value seen in memory. */
- /* Now of course there's a complication in the 32-bit case
- (bah!): if the DCAS succeeds, we need to leave RDX:RAX
- unchanged; but if we use the same scheme as in the 64-bit
- case, we get hit by the standard rule that a write to the
- bottom 32 bits of an integer register zeros the upper 32
- bits. And so the upper halves of RDX and RAX mysteriously
- become zero. So we have to stuff back in the original
- 64-bit values which we previously stashed in
- expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
- /* It's just _so_ much fun ... */
- putIRegRDX( 8,
- IRExpr_ITE( mkexpr(success),
- mkexpr(expdHi64),
- sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
- : mkexpr(oldHi)
- ));
- putIRegRAX( 8,
- IRExpr_ITE( mkexpr(success),
- mkexpr(expdLo64),
- sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
- : mkexpr(oldLo)
- ));
-
- /* Copy the success bit into the Z flag and leave the others
- unchanged */
- assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
- assign(
- flags_new,
- binop(Iop_Or64,
- binop(Iop_And64, mkexpr(flags_old),
- mkU64(~AMD64G_CC_MASK_Z)),
- binop(Iop_Shl64,
- binop(Iop_And64,
- unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
- mkU8(AMD64G_CC_SHIFT_Z)) ));
+ /* cmpxchg16b requires an alignment check. */
+ if (sz == 8)
+ gen_SEGV_if_not_16_aligned( addr );
- stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
- stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
- stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
- /* Set NDEP even though it isn't used. This makes
- redundant-PUT elimination of previous stores to this field
- work better. */
- stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+ /* Get the expected and new values. */
+ assign( expdHi64, getIReg64(R_RDX) );
+ assign( expdLo64, getIReg64(R_RAX) );
+
+ /* These are the correctly-sized expected and new values.
+ However, we also get expdHi64/expdLo64 above as 64-bits
+ regardless, because we will need them later in the 32-bit
+ case (paradoxically). */
+ assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
+ : mkexpr(expdHi64) );
+ assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
+ : mkexpr(expdLo64) );
+ assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
+ assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
+
+ /* Do the DCAS */
+ stmt( IRStmt_CAS(
+ mkIRCAS( oldHi, oldLo,
+ Iend_LE, mkexpr(addr),
+ mkexpr(expdHi), mkexpr(expdLo),
+ mkexpr(dataHi), mkexpr(dataLo)
+ )));
- /* Sheesh. Aren't you glad it was me and not you that had to
- write and validate all this grunge? */
+ /* success when oldHi:oldLo == expdHi:expdLo */
+ assign( success,
+ binop(opCasCmpEQ,
+ binop(opOR,
+ binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
+ binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
+ ),
+ zero
+ ));
+
+ /* If the DCAS is successful, that is to say oldHi:oldLo ==
+ expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
+ which is where they came from originally. Both the actual
+ contents of these two regs, and any shadow values, are
+ unchanged. If the DCAS fails then we're putting into
+ RDX:RAX the value seen in memory. */
+ /* Now of course there's a complication in the 32-bit case
+ (bah!): if the DCAS succeeds, we need to leave RDX:RAX
+ unchanged; but if we use the same scheme as in the 64-bit
+ case, we get hit by the standard rule that a write to the
+ bottom 32 bits of an integer register zeros the upper 32
+ bits. And so the upper halves of RDX and RAX mysteriously
+ become zero. So we have to stuff back in the original
+ 64-bit values which we previously stashed in
+ expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
+ /* It's just _so_ much fun ... */
+ putIRegRDX( 8,
+ IRExpr_ITE( mkexpr(success),
+ mkexpr(expdHi64),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
+ : mkexpr(oldHi)
+ ));
+ putIRegRAX( 8,
+ IRExpr_ITE( mkexpr(success),
+ mkexpr(expdLo64),
+ sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
+ : mkexpr(oldLo)
+ ));
+
+ /* Copy the success bit into the Z flag and leave the others
+ unchanged */
+ assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
+ assign(
+ flags_new,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(flags_old),
+ mkU64(~AMD64G_CC_MASK_Z)),
+ binop(Iop_Shl64,
+ binop(Iop_And64,
+ unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
+ mkU8(AMD64G_CC_SHIFT_Z)) ));
- DIP("cmpxchg8b %s\n", dis_buf);
- return delta;
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ /* Set NDEP even though it isn't used. This makes
+ redundant-PUT elimination of previous stores to this field
+ work better. */
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ /* Sheesh. Aren't you glad it was me and not you that had to
+ write and validate all this grunge? */
+
+ DIP("cmpxchg8b %s\n", dis_buf);
+ return delta;
+ } // if (isValidCMPXCHG)
+
+ /* 0F C7 /6 no-F2-or-F3 = RDRAND */
+ if (gregLO3ofRM(modrm) == 6/*RDRAND*/
+ && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND)
+ && epartIsReg(modrm) && haveNoF2noF3(pfx)
+ && (sz == 8 || sz == 4 || sz == 2)) {
+ delta++; // move past modrm
+ IRType ty = szToITy(sz);
+
+ // Pull a first 32 bits of randomness, plus C flag, out of the host.
+ IRTemp pairLO = newTemp(Ity_I64);
+ IRDirty* dLO
+ = unsafeIRDirty_1_N(pairLO, 0/*regparms*/,
+ "amd64g_dirtyhelper_RDRAND",
+ &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
+ // There are no guest state or memory effects to declare for |dLO|.
+ stmt( IRStmt_Dirty(dLO) );
+
+ IRTemp randsLO = newTemp(Ity_I32);
+ assign(randsLO, unop(Iop_64to32, mkexpr(pairLO)));
+ IRTemp cLO = newTemp(Ity_I64);
+ assign(cLO, binop(Iop_Shr64, mkexpr(pairLO), mkU8(32)));
+
+ // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
+ IRTemp randsNearlyFinal = newTemp(Ity_I64);
+ IRTemp cFinal = newTemp(Ity_I64);
+
+ if (ty == Ity_I64) {
+ // Pull another 32 bits of randomness out of the host.
+ IRTemp pairHI = newTemp(Ity_I64);
+ IRDirty* dHI
+ = unsafeIRDirty_1_N(pairHI, 0/*regparms*/,
+ "amd64g_dirtyhelper_RDRAND",
+ &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
+ // There are no guest state or memory effects to declare for |dHI|.
+ stmt( IRStmt_Dirty(dHI) );
+
+ IRTemp randsHI = newTemp(Ity_I32);
+ assign(randsHI, unop(Iop_64to32, mkexpr(pairHI)));
+ IRTemp cHI = newTemp(Ity_I64);
+ assign(cHI, binop(Iop_Shr64, mkexpr(pairHI), mkU8(32)));
+ assign(randsNearlyFinal, binop(Iop_32HLto64,
+ mkexpr(randsHI), mkexpr(randsLO)));
+ assign(cFinal, binop(Iop_And64,
+ binop(Iop_And64, mkexpr(cHI), mkexpr(cLO)),
+ mkU64(1)));
+ } else {
+ assign(randsNearlyFinal, unop(Iop_32Uto64, mkexpr(randsLO)));
+ assign(cFinal, binop(Iop_And64, mkexpr(cLO), mkU64(1)));
+ }
+
+ /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
+ means success). But there's another twist. If we failed then the
+ returned value must be forced to zero. Otherwise we could have the
+ situation, when sz==8, where one of the host calls failed but the
+ other didn't. This would give cFinal[0] == 0 (correctly) but
+ randsNearlyFinal not being zero, because it contains the 32 bit
+ result of the non-failing call. */
+ IRTemp randsFinal = newTemp(Ity_I64);
+ assign(randsFinal,
+ binop(Iop_And64,
+ mkexpr(randsNearlyFinal),
+ binop(Iop_Sar64,
+ binop(Iop_Shl64, mkexpr(cFinal), mkU8(63)),
+ mkU8(63))
+ ));
+
+ // So, finally, update the guest state.
+ putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(randsFinal)));
+
+ // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
+ // masked so only the lowest bit remains.
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(cFinal) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ DIP("rdrand %s", nameIRegE(sz, pfx, modrm));
+ return delta;
+ }
+
+ goto decode_failure;
}
case 0xC8: /* BSWAP %eax */
}
break;
+ case 0x13:
+ /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
+ if (have66noF2noF3(pfx)
+ && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
+ && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
+ delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/False );
+ goto decode_success;
+ }
+ /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
+ if (have66noF2noF3(pfx)
+ && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
+ && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
+ delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/True );
+ goto decode_success;
+ }
+ break;
+
case 0x16:
/* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
if (have66noF2noF3(pfx)
return res;
}
+/* Handles 128 and 256 bit versions of VCVTPS2PH. */
+static Long dis_VCVTPS2PH ( const VexAbiInfo* vbi, Prefix pfx,
+ Long delta, Bool is256bit )
+{
+ /* This is a width-halving store or reg-reg move, that does conversion on the
+ transferred data. */
+ UChar modrm = getUChar(delta);
+ UInt rG = gregOfRexRM(pfx, modrm);
+ IRTemp rm = newTemp(Ity_I32);
+ IROp op = is256bit ? Iop_F32toF16x8 : Iop_F32toF16x4;
+ IRExpr* srcG = (is256bit ? getYMMReg : getXMMReg)(rG);
+
+ /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
+ is the same as the encoding for IRRoundingMode, we can use that value
+ directly in the IR as a rounding mode. */
+
+ if (epartIsReg(modrm)) {
+ UInt rE = eregOfRexRM(pfx, modrm);
+ delta += 1;
+ UInt imm = getUChar(delta);
+ assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
+ IRExpr* res = binop(op, mkexpr(rm), srcG);
+ if (!is256bit)
+ res = unop(Iop_64UtoV128, res);
+ putYMMRegLoAndZU(rE, res);
+ DIP("vcvtps2ph $%u,%s,%s\n",
+ imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), nameXMMReg(rE));
+ } else {
+ Int alen = 0;
+ HChar dis_buf[50];
+ IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
+ delta += alen;
+ UInt imm = getUChar(delta);
+ assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
+ IRExpr* res = binop(op, mkexpr(rm), srcG);
+ storeLE(mkexpr(addr), res);
+ DIP("vcvtps2ph $%u,%s,%s\n",
+ imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), dis_buf);
+ }
+ delta++;
+ /* doesn't use vvvv */
+ return delta;
+}
+
__attribute__((noinline))
static
Long dis_ESC_0F3A__VEX (
}
break;
+ case 0x1D:
+ /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
+ if (have66noF2noF3(pfx)
+ && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
+ && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
+ delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/False );
+ goto decode_success;
+ }
+ /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
+ if (have66noF2noF3(pfx)
+ && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
+ && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
+ delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/True );
+ goto decode_success;
+ }
+ break;
+
case 0x20:
/* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
if (have66noF2noF3(pfx)
case Asse_UNPCKLQ: return "punpcklq";
case Asse_PSHUFB: return "pshufb";
case Asse_PMADDUBSW: return "pmaddubsw";
+ case Asse_F32toF16: return "vcvtps2ph(rm_field=$0x4).";
+ case Asse_F16toF32: return "vcvtph2ps.";
default: vpanic("showAMD64SseOp");
}
}
|| i->Ain.Sse32Fx4.op == Asse_RSQRTF
|| i->Ain.Sse32Fx4.op == Asse_SQRTF
|| i->Ain.Sse32Fx4.op == Asse_I2F
- || i->Ain.Sse32Fx4.op == Asse_F2I );
+ || i->Ain.Sse32Fx4.op == Asse_F2I
+ || i->Ain.Sse32Fx4.op == Asse_F32toF16
+ || i->Ain.Sse32Fx4.op == Asse_F16toF32 );
addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
addHRegUse(u, unary ? HRmWrite : HRmModify,
i->Ain.Sse32Fx4.dst);
i->Ain.SseLdzLO.addr);
goto done;
- case Ain_Sse32Fx4:
+ case Ain_Sse32Fx4: {
+ UInt srcRegNo = vregEnc3210(i->Ain.Sse32Fx4.src);
+ UInt dstRegNo = vregEnc3210(i->Ain.Sse32Fx4.dst);
+ // VEX encoded cases
+ switch (i->Ain.Sse32Fx4.op) {
+ case Asse_F16toF32: { // vcvtph2ps %xmmS, %xmmD
+ UInt s = srcRegNo;
+ UInt d = dstRegNo;
+ // VCVTPH2PS %xmmS, %xmmD (s and d are both xmm regs, range 0 .. 15)
+ // 0xC4 : ~d3 1 ~s3 0 0 0 1 0 : 0x79 : 0x13 : 1 1 d2 d1 d0 s2 s1 s0
+ UInt byte2 = ((((~d)>>3)&1)<<7) | (1<<6)
+ | ((((~s)>>3)&1)<<5) | (1<<1);
+ UInt byte5 = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0);
+ *p++ = 0xC4;
+ *p++ = byte2;
+ *p++ = 0x79;
+ *p++ = 0x13;
+ *p++ = byte5;
+ goto done;
+ }
+ case Asse_F32toF16: { // vcvtps2ph $4, %xmmS, %xmmD
+ UInt s = srcRegNo;
+ UInt d = dstRegNo;
+ // VCVTPS2PH $4, %xmmS, %xmmD (s and d both xmm regs, range 0 .. 15)
+ // 0xC4 : ~s3 1 ~d3 0 0 0 1 1 : 0x79
+ // : 0x1D : 11 s2 s1 s0 d2 d1 d0 : 0x4
+ UInt byte2 = ((((~s)>>3)&1)<<7) | (1<<6)
+ | ((((~d)>>3)&1)<<5) | (1<<1) | (1 << 0);
+ UInt byte5 = (1<<7) | (1<<6) | ((s&7) << 3) | ((d&7) << 0);
+ *p++ = 0xC4;
+ *p++ = byte2;
+ *p++ = 0x79;
+ *p++ = 0x1D;
+ *p++ = byte5;
+ *p++ = 0x04;
+ goto done;
+ }
+ default: break;
+ }
+ // After this point, REX encoded cases only
xtra = 0;
switch (i->Ain.Sse32Fx4.op) {
case Asse_F2I: *p++ = 0x66; break;
default: break;
}
- *p++ = clearWBit(
- rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32Fx4.dst),
- vregEnc3210(i->Ain.Sse32Fx4.src) ));
+ *p++ = clearWBit(rexAMode_R_enc_enc(dstRegNo, srcRegNo));
*p++ = 0x0F;
switch (i->Ain.Sse32Fx4.op) {
case Asse_ADDF: *p++ = 0x58; break;
case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
default: goto bad;
}
- p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32Fx4.dst),
- vregEnc3210(i->Ain.Sse32Fx4.src) );
+ p = doAMode_R_enc_enc(p, dstRegNo, srcRegNo);
if (xtra & 0x100)
*p++ = toUChar(xtra & 0xFF);
goto done;
+ }
case Ain_Sse64Fx2:
xtra = 0;
Asse_UNPCKLB, Asse_UNPCKLW, Asse_UNPCKLD, Asse_UNPCKLQ,
// Only for SSSE3 capable hosts:
Asse_PSHUFB,
- Asse_PMADDUBSW
+ Asse_PMADDUBSW,
+ // Only for F16C capable hosts:
+ Asse_F32toF16, // F32 to F16 conversion, aka vcvtps2ph
+ Asse_F16toF32, // F16 to F32 conversion, aka vcvtph2ps
}
AMD64SseOp;
return dst;
}
+ // Half-float vector conversion
+ if (e->Iex.Binop.op == Iop_F32toF16x4
+ && (env->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
+ HReg srcV = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dstV = newVRegV(env);
+ HReg dstI = newVRegI(env);
+ set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, AMD64Instr_Sse32Fx4(Asse_F32toF16, srcV, dstV));
+ set_SSE_rounding_default(env);
+ addInstr(env, AMD64Instr_SseMOVQ(dstI, dstV, /*toXMM=*/False));
+ return dstI;
+ }
+
break;
}
}
case Iop_32UtoV128: {
+ // FIXME maybe just use MOVQ here?
HReg dst = newVRegV(env);
AMD64AMode* rsp_m32 = AMD64AMode_IR(-32, hregAMD64_RSP());
AMD64RI* ri = iselIntExpr_RI(env, e->Iex.Unop.arg);
}
case Iop_64UtoV128: {
+ // FIXME maybe just use MOVQ here?
HReg dst = newVRegV(env);
AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
AMD64RMI* rmi = iselIntExpr_RMI(env, e->Iex.Unop.arg);
return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
}
+ case Iop_F16toF32x4: {
+ if (env->hwcaps & VEX_HWCAPS_AMD64_F16C) {
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_SseMOVQ(src, dst, /*toXMM=*/True));
+ addInstr(env, AMD64Instr_Sse32Fx4(Asse_F16toF32, dst, dst));
+ return dst;
+ }
+ break;
+ }
+
default:
break;
} /* switch (e->Iex.Unop.op) */
return dst;
}
+ // Half-float vector conversion
+ case Iop_F32toF16x8: {
+ if (env->hwcaps & VEX_HWCAPS_AMD64_F16C) {
+ HReg srcHi, srcLo;
+ iselDVecExpr(&srcHi, &srcLo, env, e->Iex.Binop.arg2);
+ HReg dstHi = newVRegV(env);
+ HReg dstLo = newVRegV(env);
+ set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, AMD64Instr_Sse32Fx4(Asse_F32toF16, srcHi, dstHi));
+ addInstr(env, AMD64Instr_Sse32Fx4(Asse_F32toF16, srcLo, dstLo));
+ set_SSE_rounding_default(env);
+ // Now we have the result in dstHi[63:0] and dstLo[63:0], but we
+ // need to compact all that into one register. There's probably a
+ // more elegant way to do this, but ..
+ addInstr(env, AMD64Instr_SseShiftN(Asse_SHL128, 64, dstHi));
+ // dstHi is now 127:64 = useful data, 63:0 = zero
+ addInstr(env, AMD64Instr_SseShiftN(Asse_SHL128, 64, dstLo));
+ addInstr(env, AMD64Instr_SseShiftN(Asse_SHR128, 64, dstLo));
+ // dstLo is now 127:64 = zero, 63:0 = useful data
+ addInstr(env, AMD64Instr_SseReRg(Asse_OR, dstHi, dstLo));
+ return dstLo;
+ }
+ break;
+ }
+
default:
break;
} /* switch (e->Iex.Binop.op) */
return;
}
+ case Iop_F16toF32x8: {
+ if (env->hwcaps & VEX_HWCAPS_AMD64_F16C) {
+ HReg src = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg srcCopy = newVRegV(env);
+ HReg dstHi = newVRegV(env);
+ HReg dstLo = newVRegV(env);
+ // Copy src, since we'll need to modify it.
+ addInstr(env, mk_vMOVsd_RR(src, srcCopy));
+ addInstr(env, AMD64Instr_Sse32Fx4(Asse_F16toF32, srcCopy, dstLo));
+ addInstr(env, AMD64Instr_SseShiftN(Asse_SHR128, 64, srcCopy));
+ addInstr(env, AMD64Instr_Sse32Fx4(Asse_F16toF32, srcCopy, dstHi));
+ *rHi = dstHi;
+ *rLo = dstLo;
+ return;
+ }
+ break;
+ }
+
default:
break;
} /* switch (e->Iex.Unop.op) */
| VEX_HWCAPS_AMD64_AVX
| VEX_HWCAPS_AMD64_RDTSCP
| VEX_HWCAPS_AMD64_BMI
- | VEX_HWCAPS_AMD64_AVX2)));
+ | VEX_HWCAPS_AMD64_AVX2
+ | VEX_HWCAPS_AMD64_F16C
+ | VEX_HWCAPS_AMD64_RDRAND)));
/* Check that the host's endianness is as expected. */
vassert(archinfo_host->endness == VexEndnessLE);
case Iop_F32toI32Sx4: vex_printf("F32toI32Sx4"); return;
case Iop_F32toF16x4_DEP: vex_printf("F32toF16x4_DEP"); return;
+ case Iop_F32toF16x4: vex_printf("F32toF16x4"); return;
case Iop_F16toF32x4: vex_printf("F16toF32x4"); return;
case Iop_F16toF64x2: vex_printf("F16toF64x2"); return;
case Iop_F64toF16x2_DEP: vex_printf("F64toF16x2_DEP"); return;
case Iop_Div32Fx8: vex_printf("Div32Fx8"); return;
case Iop_I32StoF32x8: vex_printf("I32StoF32x8"); return;
case Iop_F32toI32Sx8: vex_printf("F32toI32Sx8"); return;
+ case Iop_F32toF16x8: vex_printf("F32toF16x8"); return;
+ case Iop_F16toF32x8: vex_printf("F16toF32x8"); return;
case Iop_AndV256: vex_printf("AndV256"); return;
case Iop_OrV256: vex_printf("OrV256"); return;
case Iop_XorV256: vex_printf("XorV256"); return;
case Iop_F32toI32Sx4:
BINARY(ity_RMode,Ity_V128, Ity_V128);
+ case Iop_F32toF16x4:
+ BINARY(ity_RMode,Ity_V128, Ity_I64);
+
case Iop_64HLtoV128:
BINARY(Ity_I64,Ity_I64, Ity_V128);
case Iop_F32toI32Sx8:
BINARY(ity_RMode,Ity_V256, Ity_V256);
+ case Iop_F32toF16x8:
+ BINARY(ity_RMode,Ity_V256, Ity_V128);
+
case Iop_V256toV128_1: case Iop_V256toV128_0:
UNARY(Ity_V256, Ity_V128);
+ case Iop_F16toF32x8:
+ UNARY(Ity_V128, Ity_V256);
+
case Iop_QandUQsh8x16: case Iop_QandUQsh16x8:
case Iop_QandUQsh32x4: case Iop_QandUQsh64x2:
case Iop_QandSQsh8x16: case Iop_QandSQsh16x8:
{ VEX_HWCAPS_AMD64_AVX, "avx" },
{ VEX_HWCAPS_AMD64_AVX2, "avx2" },
{ VEX_HWCAPS_AMD64_BMI, "bmi" },
+ { VEX_HWCAPS_AMD64_F16C, "f16c" },
+ { VEX_HWCAPS_AMD64_RDRAND, "rdrand" },
};
/* Allocate a large enough buffer */
static HChar buf[sizeof prefix +
#define VEX_HWCAPS_AMD64_RDTSCP (1<<9) /* RDTSCP instruction */
#define VEX_HWCAPS_AMD64_BMI (1<<10) /* BMI1 instructions */
#define VEX_HWCAPS_AMD64_AVX2 (1<<11) /* AVX2 instructions */
+#define VEX_HWCAPS_AMD64_RDRAND (1<<13) /* RDRAND instructions */
+#define VEX_HWCAPS_AMD64_F16C (1<<14) /* F16C instructions */
/* ppc32: baseline capability is integer only */
#define VEX_HWCAPS_PPC32_F (1<<8) /* basic (non-optional) FP */
/* --- Single to/from half conversion --- */
/* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
// FIXME these carry no rounding mode
- Iop_F32toF16x4_DEP, /* F32x4 -> F16x4, NO ROUNDING MODE */
- Iop_F16toF32x4, /* F16x4 -> F32x4 */
+ Iop_F32toF16x4_DEP, /* F32x4(==V128) -> F16x4(==I64), NO ROUNDING MODE */
+ Iop_F32toF16x4, /* IRRoundingMode(I32) x V128 -> I64 */
+ Iop_F16toF32x4, /* F16x4 -> F32x4 */
/* -- Double to/from half conversion -- */
Iop_F64toF16x2_DEP, // F64x2 -> F16x2, NO ROUNDING MODE
Iop_I32StoF32x8, /* IRRoundingMode(I32) x V256 -> V256 */
Iop_F32toI32Sx8, /* IRRoundingMode(I32) x V256 -> V256 */
+ Iop_F32toF16x8, /* IRRoundingMode(I32) x V256 -> V128 */
+ Iop_F16toF32x8, /* F16x8(==V128) -> F32x8(==V256) */
+
Iop_Sqrt32Fx8,
Iop_Sqrt64Fx4,
Iop_RSqrtEst32Fx8,
AM_CONDITIONAL(BUILD_ADX_TESTS, test x$ac_have_as_adx = xyes)
-# Does the C compiler support the "ifunc" attribute
+# does the amd64 assembler understand the RDRAND instruction?
# Note, this doesn't generate a C-level symbol. It generates a
-# automake-level symbol (BUILD_IFUNC_TESTS), used in test Makefile.am's
+# automake-level symbol (BUILD_RDRAND_TESTS), used in test Makefile.am's
+AC_MSG_CHECKING([if amd64 assembler knows the RDRAND instruction])
+
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[
+ do {
+ asm ("rdrand %r14");
+ asm ("rdrand %r14d");
+ asm ("rdrand %r14w");
+ } while (0)
+]])], [
+ac_have_as_rdrand=yes
+AC_MSG_RESULT([yes])
+], [
+ac_have_as_rdrand=no
+AC_MSG_RESULT([no])
+])
+
+AM_CONDITIONAL(BUILD_RDRAND_TESTS, test x$ac_have_as_rdrand = xyes)
+
+
+# does the amd64 assembler understand the F16C instructions (VCVTPH2PS and
+# VCVTPS2PH) ?
+# Note, this doesn't generate a C-level symbol. It generates a
+# automake-level symbol (BUILD_F16C_TESTS), used in test Makefile.am's
+AC_MSG_CHECKING([if amd64 assembler knows the F16C instructions])
+
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[
+ do {
+ asm ("vcvtph2ps %xmm5, %ymm10");
+ // If we put the dollar sign and zero together, the shell processing
+ // this configure.ac script substitutes the command name in. Sigh.
+ asm ("vcvtps2ph $" "0, %ymm10, %xmm5");
+ } while (0)
+]])], [
+ac_have_as_f16c=yes
+AC_MSG_RESULT([yes])
+], [
+ac_have_as_f16c=no
+AC_MSG_RESULT([no])
+])
+
+AM_CONDITIONAL(BUILD_F16C_TESTS, test x$ac_have_as_f16c = xyes)
+
+
# does the x86/amd64 assembler understand MOVBE?
# Note, this doesn't generate a C-level symbol. It generates a
# automake-level symbol (BUILD_MOVBE_TESTS), used in test Makefile.am's
#elif defined(VGA_amd64)
{ Bool have_sse3, have_ssse3, have_cx8, have_cx16;
Bool have_lzcnt, have_avx, have_bmi, have_avx2;
- Bool have_rdtscp;
+ Bool have_rdtscp, have_rdrand, have_f16c;
UInt eax, ebx, ecx, edx, max_basic, max_extended;
ULong xgetbv_0 = 0;
HChar vstr[13];
have_sse3 = have_ssse3 = have_cx8 = have_cx16
= have_lzcnt = have_avx = have_bmi = have_avx2
- = have_rdtscp = False;
+ = have_rdtscp = have_rdrand = have_f16c = False;
eax = ebx = ecx = edx = max_basic = max_extended = 0;
// we assume that SSE1 and SSE2 are available by default
have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */
have_ssse3 = (ecx & (1<<9)) != 0; /* True => have Sup SSE3 insns */
+ // fma is ecx:12
// sse41 is ecx:19
// sse42 is ecx:20
-
// xsave is ecx:26
// osxsave is ecx:27
// avx is ecx:28
- // fma is ecx:12
+ have_f16c = (ecx & (1<<29)) != 0; /* True => have F16C insns */
+ have_rdrand = (ecx & (1<<30)) != 0; /* True => have RDRAND insns */
+
have_avx = False;
/* have_fma = False; */
if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
}
+ /* Sanity check for RDRAND and F16C. These don't actually *need* AVX2, but
+ it's convenient to restrict them to the AVX2 case since the simulated
+ CPUID we'll offer them on has AVX2 as a base. */
+ if (!have_avx2) {
+ have_f16c = False;
+ have_rdrand = False;
+ }
+
va = VexArchAMD64;
vai.endness = VexEndnessLE;
vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
| (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
| (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
| (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
- | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
+ | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0)
+ | (have_f16c ? VEX_HWCAPS_AMD64_F16C : 0)
+ | (have_rdrand ? VEX_HWCAPS_AMD64_RDRAND : 0);
VG_(machine_get_cache_info)(&vai);
binop(Iop_V128HLtoV256, qV, shV));
}
+ case Iop_F32toF16x4: {
+ // First, PCast the input vector, retaining the 32x4 format.
+ IRAtom* pcasted = mkPCast32x4(mce, vatom2); // :: 32x4
+ // Now truncate each 32 bit lane to 16 bits. Since we already PCasted
+ // the input, we're not going to lose any information.
+ IRAtom* pcHI64
+ = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, pcasted));//32x2
+ IRAtom* pcLO64
+ = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, pcasted)); // 32x2
+ IRAtom* narrowed
+ = assignNew('V', mce, Ity_I64, binop(Iop_NarrowBin32to16x4,
+ pcHI64, pcLO64)); // 16x4
+ // Finally, roll in any badness from the rounding mode.
+ IRAtom* rmPCasted = mkPCastTo(mce, Ity_I64, vatom1);
+ return mkUifU64(mce, narrowed, rmPCasted);
+ }
+
+ case Iop_F32toF16x8: {
+ // Same scheme as for Iop_F32toF16x4.
+ IRAtom* pcasted = mkPCast32x8(mce, vatom2); // :: 32x8
+ IRAtom* pcHI128
+ = assignNew('V', mce, Ity_V128, unop(Iop_V256toV128_1,
+ pcasted)); // 32x4
+ IRAtom* pcLO128
+ = assignNew('V', mce, Ity_V128, unop(Iop_V256toV128_0,
+ pcasted)); // 32x4
+ IRAtom* narrowed
+ = assignNew('V', mce, Ity_V128, binop(Iop_NarrowBin32to16x8,
+ pcHI128, pcLO128)); // 16x8
+ // Finally, roll in any badness from the rounding mode.
+ IRAtom* rmPCasted = mkPCastTo(mce, Ity_V128, vatom1);
+ return mkUifUV128(mce, narrowed, rmPCasted);
+ }
+
default:
ppIROp(op);
VG_(tool_panic)("memcheck:expr2vbits_Binop");
case Iop_QNarrowUn64Sto32Sx2:
case Iop_QNarrowUn64Sto32Ux2:
case Iop_QNarrowUn64Uto32Ux2:
+ return vectorNarrowUnV128(mce, op, vatom);
+
+ // JRS FIXME 2019 Mar 17: per comments on F16toF32x4, this is probably not
+ // right.
case Iop_F32toF16x4_DEP:
return vectorNarrowUnV128(mce, op, vatom);
case Iop_Widen16Uto32x4:
case Iop_Widen32Sto64x2:
case Iop_Widen32Uto64x2:
+ return vectorWidenI64(mce, op, vatom);
+
case Iop_F16toF32x4:
+ // JRS 2019 Mar 17: this definitely isn't right, but it probably works
+ // OK by accident if -- as seems likely -- the F16 to F32 conversion
+ // preserves will generate an output 32 bits with at least one 1 bit
+ // set if there's one or more 1 bits set in the input 16 bits. More
+ // correct code for this is just below, but commented out, so as to
+ // avoid short-term backend failures on targets that can't do
+ // Iop_Interleave{LO,HI}16x4.
return vectorWidenI64(mce, op, vatom);
+ case Iop_F16toF32x8: {
+ // PCast the input at 16x8. This makes each lane hold either all
+ // zeroes or all ones.
+ IRAtom* pcasted = mkPCast16x8(mce, vatom); // :: I16x8
+ // Now double the width of each lane to 32 bits. Because the lanes are
+ // all zeroes or all ones, we can just copy the each lane twice into
+ // the result. Here's the low half:
+ IRAtom* widenedLO // :: I32x4
+ = assignNew('V', mce, Ity_V128, binop(Iop_InterleaveLO16x8,
+ pcasted, pcasted));
+ // And the high half:
+ IRAtom* widenedHI // :: I32x4
+ = assignNew('V', mce, Ity_V128, binop(Iop_InterleaveHI16x8,
+ pcasted, pcasted));
+ // Glue them back together:
+ return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
+ widenedHI, widenedLO));
+ }
+
+ // See comment just above, for Iop_F16toF32x4
+ //case Iop_F16toF32x4: {
+ // // Same scheme as F16toF32x4
+ // IRAtom* pcasted = mkPCast16x4(mce, vatom); // :: I16x4
+ // IRAtom* widenedLO // :: I32x2
+ // = assignNew('V', mce, Ity_I64, binop(Iop_InterleaveLO16x4,
+ // pcasted, pcasted));
+ // IRAtom* widenedHI // :: I32x4
+ // = assignNew('V', mce, Ity_I64, binop(Iop_InterleaveHI16x4,
+ // pcasted, pcasted));
+ // // Glue them back together:
+ // return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
+ // widenedHI, widenedLO));
+ //}
+
case Iop_PwAddL32Ux2:
case Iop_PwAddL32Sx2:
return mkPCastTo(mce, Ity_I64,
{ DEFOP(Iop_Fixed32UToF32x4_RN, UNDEF_UNKNOWN), },
{ DEFOP(Iop_Fixed32SToF32x4_RN, UNDEF_UNKNOWN), },
{ DEFOP(Iop_F32toF16x4_DEP, UNDEF_UNKNOWN), },
+ { DEFOP(Iop_F32toF16x4, UNDEF_UNKNOWN), },
{ DEFOP(Iop_F16toF32x4, UNDEF_UNKNOWN), },
{ DEFOP(Iop_F64toF16x2_DEP, UNDEF_UNKNOWN), },
{ DEFOP(Iop_F16toF64x2, UNDEF_UNKNOWN), },
{ DEFOP(Iop_Div32Fx8, UNDEF_UNKNOWN), },
{ DEFOP(Iop_I32StoF32x8, UNDEF_UNKNOWN), },
{ DEFOP(Iop_F32toI32Sx8, UNDEF_UNKNOWN), },
+ { DEFOP(Iop_F32toF16x8, UNDEF_UNKNOWN), },
+ { DEFOP(Iop_F16toF32x8, UNDEF_UNKNOWN) },
{ DEFOP(Iop_Sqrt32Fx8, UNDEF_UNKNOWN), },
{ DEFOP(Iop_Sqrt64Fx4, UNDEF_UNKNOWN), },
{ DEFOP(Iop_RSqrtEst32Fx8, UNDEF_UNKNOWN), },
clc.vgtest clc.stdout.exp clc.stderr.exp \
crc32.vgtest crc32.stdout.exp crc32.stderr.exp \
cmpxchg.vgtest cmpxchg.stdout.exp cmpxchg.stderr.exp \
+ f16c.vgtest f16c.stderr.exp f16c.stdout.exp \
faultstatus.disabled faultstatus.stderr.exp \
fb_test_amd64.vgtest \
fb_test_amd64.stderr.exp fb_test_amd64.stdout.exp \
pcmpxstrx64w.stderr.exp pcmpxstrx64w.stdout.exp \
pcmpxstrx64w.vgtest \
rcl-amd64.vgtest rcl-amd64.stdout.exp rcl-amd64.stderr.exp \
+ rdrand.vgtest rdrand.stdout.exp rdrand.stderr.exp \
redundantRexW.vgtest redundantRexW.stdout.exp \
redundantRexW.stderr.exp \
smc1.stderr.exp smc1.stdout.exp smc1.vgtest \
if BUILD_MPX_TESTS
check_PROGRAMS += mpx
endif
+if BUILD_F16C_TESTS
+ check_PROGRAMS += f16c
+endif
+if BUILD_RDRAND_TESTS
+ check_PROGRAMS += rdrand
+endif
# DDD: these need to be made to work on Darwin like the x86/ ones were.
--- /dev/null
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "tests/malloc.h"
+
+typedef unsigned char UChar;
+typedef unsigned int UInt;
+typedef unsigned long int UWord;
+typedef unsigned long long int ULong;
+
+#define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
+
+typedef union { UChar u8[32]; UInt u32[8]; } YMM;
+
+typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block;
+
+void showYMM ( YMM* vec )
+{
+ int i;
+ assert(IS_32_ALIGNED(vec));
+ for (i = 31; i >= 0; i--) {
+ printf("%02x", (UInt)vec->u8[i]);
+ if (i > 0 && 0 == ((i+0) & 7)) printf(".");
+ }
+}
+
+void showBlock ( char* msg, Block* block )
+{
+ printf(" %s\n", msg);
+ printf(" "); showYMM(&block->a1); printf("\n");
+ printf(" "); showYMM(&block->a2); printf("\n");
+ printf(" "); showYMM(&block->a3); printf("\n");
+ printf(" "); showYMM(&block->a4); printf("\n");
+ printf(" %016llx\n", block->u64);
+}
+
+UChar randUChar ( void )
+{
+ static UInt seed = 80021;
+ seed = 1103515245 * seed + 12345;
+ return (seed >> 17) & 0xFF;
+}
+
+void randBlock ( Block* b )
+{
+ int i;
+ UChar* p = (UChar*)b;
+ for (i = 0; i < sizeof(Block); i++)
+ p[i] = randUChar();
+}
+
+
+/* Generate a function test_NAME, that tests the given insn, in both
+ its mem and reg forms. The reg form of the insn may mention, as
+ operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of
+ the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9
+ and %r14. It's OK for the insn to clobber ymm0, as this is needed
+ for testing PCMPxSTRx, and ymm6, as this is needed for testing
+ MOVMASK variants. */
+
+#define GEN_test_RandM(_name, _reg_form, _mem_form) \
+ \
+ __attribute__ ((noinline)) static void test_##_name ( void ) \
+ { \
+ Block* b = memalign32(sizeof(Block)); \
+ randBlock(b); \
+ printf("%s(reg)\n", #_name); \
+ showBlock("before", b); \
+ __asm__ __volatile__( \
+ "vmovdqa 0(%0),%%ymm7" "\n\t" \
+ "vmovdqa 32(%0),%%ymm8" "\n\t" \
+ "vmovdqa 64(%0),%%ymm6" "\n\t" \
+ "vmovdqa 96(%0),%%ymm9" "\n\t" \
+ "movq 128(%0),%%r14" "\n\t" \
+ _reg_form "\n\t" \
+ "vmovdqa %%ymm7, 0(%0)" "\n\t" \
+ "vmovdqa %%ymm8, 32(%0)" "\n\t" \
+ "vmovdqa %%ymm6, 64(%0)" "\n\t" \
+ "vmovdqa %%ymm9, 96(%0)" "\n\t" \
+ "movq %%r14, 128(%0)" "\n\t" \
+ : /*OUT*/ \
+ : /*IN*/"r"(b) \
+ : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
+ ); \
+ showBlock("after", b); \
+ randBlock(b); \
+ printf("%s(mem)\n", #_name); \
+ showBlock("before", b); \
+ __asm__ __volatile__( \
+ "leaq 0(%0),%%rax" "\n\t" \
+ "vmovdqa 32(%0),%%ymm8" "\n\t" \
+ "vmovdqa 64(%0),%%ymm7" "\n\t" \
+ "vmovdqa 96(%0),%%ymm9" "\n\t" \
+ "movq 128(%0),%%r14" "\n\t" \
+ _mem_form "\n\t" \
+ "vmovdqa %%ymm8, 32(%0)" "\n\t" \
+ "vmovdqa %%ymm7, 64(%0)" "\n\t" \
+ "vmovdqa %%ymm9, 96(%0)" "\n\t" \
+ "movq %%r14, 128(%0)" "\n\t" \
+ : /*OUT*/ \
+ : /*IN*/"r"(b) \
+ : /*TRASH*/"xmm6", \
+ "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \
+ ); \
+ showBlock("after", b); \
+ printf("\n"); \
+ free(b); \
+ }
+
+#define GEN_test_Ronly(_name, _reg_form) \
+ GEN_test_RandM(_name, _reg_form, "")
+#define GEN_test_Monly(_name, _mem_form) \
+ GEN_test_RandM(_name, "", _mem_form)
+
+GEN_test_RandM(VCVTPH2PS_128,
+ "vcvtph2ps %%xmm6, %%xmm8",
+ "vcvtph2ps (%%rax), %%xmm8");
+GEN_test_RandM(VCVTPH2PS_256,
+ "vcvtph2ps %%xmm6, %%ymm8",
+ "vcvtph2ps (%%rax), %%ymm8");
+
+GEN_test_RandM(VCVTPS2PH_128_0,
+ "vcvtps2ph $0, %%xmm8, %%xmm6",
+ "vcvtps2ph $0, %%xmm8, (%%rax)");
+GEN_test_RandM(VCVTPS2PH_256_0,
+ "vcvtps2ph $0, %%ymm8, %%xmm6",
+ "vcvtps2ph $0, %%ymm8, (%%rax)");
+
+GEN_test_RandM(VCVTPS2PH_128_1,
+ "vcvtps2ph $1, %%xmm8, %%xmm6",
+ "vcvtps2ph $1, %%xmm8, (%%rax)");
+GEN_test_RandM(VCVTPS2PH_256_1,
+ "vcvtps2ph $1, %%ymm8, %%xmm6",
+ "vcvtps2ph $1, %%ymm8, (%%rax)");
+
+GEN_test_RandM(VCVTPS2PH_128_2,
+ "vcvtps2ph $2, %%xmm8, %%xmm6",
+ "vcvtps2ph $2, %%xmm8, (%%rax)");
+GEN_test_RandM(VCVTPS2PH_256_2,
+ "vcvtps2ph $2, %%ymm8, %%xmm6",
+ "vcvtps2ph $2, %%ymm8, (%%rax)");
+
+GEN_test_RandM(VCVTPS2PH_128_3,
+ "vcvtps2ph $3, %%xmm8, %%xmm6",
+ "vcvtps2ph $3, %%xmm8, (%%rax)");
+GEN_test_RandM(VCVTPS2PH_256_3,
+ "vcvtps2ph $3, %%ymm8, %%xmm6",
+ "vcvtps2ph $3, %%ymm8, (%%rax)");
+
+GEN_test_RandM(VCVTPS2PH_128_4,
+ "vcvtps2ph $4, %%xmm8, %%xmm6",
+ "vcvtps2ph $4, %%xmm8, (%%rax)");
+GEN_test_RandM(VCVTPS2PH_256_4,
+ "vcvtps2ph $4, %%ymm8, %%xmm6",
+ "vcvtps2ph $4, %%ymm8, (%%rax)");
+
+/* Comment duplicated above, for convenient reference:
+ Allowed operands in test insns:
+ Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14.
+ Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14.
+ Imm8 etc fields are also allowed, where they make sense.
+ Both forms may use ymm0 as scratch. Mem form may also use
+ ymm6 as scratch.
+*/
+
+#define N_DEFAULT_ITERS 3
+
+// Do the specified test some number of times
+#define DO_N(_iters, _testfn) \
+ do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0)
+
+// Do the specified test the default number of times
+#define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn)
+
+
+int main ( void )
+{
+ DO_D( VCVTPH2PS_128 );
+ DO_D( VCVTPH2PS_256 );
+
+ DO_D( VCVTPS2PH_128_0 );
+ DO_D( VCVTPS2PH_256_0 );
+
+ DO_D( VCVTPS2PH_128_1 );
+ DO_D( VCVTPS2PH_256_1 );
+
+ DO_D( VCVTPS2PH_128_2 );
+ DO_D( VCVTPS2PH_256_2 );
+
+ DO_D( VCVTPS2PH_128_3 );
+ DO_D( VCVTPS2PH_256_3 );
+
+ DO_D( VCVTPS2PH_128_4 );
+ DO_D( VCVTPS2PH_256_4 );
+
+ return 0;
+}
--- /dev/null
+VCVTPH2PS_128(reg)
+ before
+ 7d6528c5fa956a0d.69c3e9a6af27d13b.5175e39d19c9ca1e.98f24a4984175700
+ b6d2fb5aa7bc5127.fe9915e556a044b2.60b160857d45c484.47b8d8c0eeef1e50
+ 065d77195d623e6b.842adc6450659e17.19a348215c3a67fd.399182c2dbcc2d38
+ cb509970b8136c85.d740b80eb7839b97.d89998df5035ed36.4a4bc43968bc40e5
+ 56b01a12b0ca1583
+ after
+ 7d6528c5fa956a0d.69c3e9a6af27d13b.5175e39d19c9ca1e.98f24a4984175700
+ 0000000000000000.0000000000000000.3f322000b8308000.c37980003da70000
+ 065d77195d623e6b.842adc6450659e17.19a348215c3a67fd.399182c2dbcc2d38
+ cb509970b8136c85.d740b80eb7839b97.d89998df5035ed36.4a4bc43968bc40e5
+ 56b01a12b0ca1583
+VCVTPH2PS_128(mem)
+ before
+ 398e0039cf03663d.5ff85bc9535c191f.d3a727d1a705f65d.f9dd4a29f8c093db
+ cfaff39be272ef40.20a1bb92cbc97fe8.542da4983df76c96.d8bc5c6dee699597
+ f4e06e2205236eb7.6897b536bbe4da8a.369dab4f9465b86e.d182c916cebc2e17
+ 84ededbc53239dcf.95264321bf3b68b2.55c2b9e2c95c9810.407b8d9035449b06
+ 81f2a547be8d1811
+ after
+ 398e0039cf03663d.5ff85bc9535c191f.d3a727d1a705f65d.f9dd4a29f8c093db
+ 0000000000000000.0000000000000000.c73ba00041452000.c7180000ba7b6000
+ f4e06e2205236eb7.6897b536bbe4da8a.369dab4f9465b86e.d182c916cebc2e17
+ 84ededbc53239dcf.95264321bf3b68b2.55c2b9e2c95c9810.407b8d9035449b06
+ 81f2a547be8d1811
+
+VCVTPH2PS_128(reg)
+ before
+ f0350ca70523e0e4.5ba1ec54e87d39b3.019963bf7459630b.8d69483df7e8c6a9
+ e98ebd1ca893312a.54cae7d5e13dfe91.0a3e0f7c75cb0842.b95ed64d3b13ff64
+ c84ab71340684590.4d325b2d5a70a792.0a5f45c55f1c9202.b76ddefcb0ebfe6e
+ e9b5f3f66b2e58c1.21a6c3476d21f1e5.5f490104ced83ff8.6262dd37727c80f3
+ 96084deb9ed0411e
+ after
+ f0350ca70523e0e4.5ba1ec54e87d39b3.019963bf7459630b.8d69483df7e8c6a9
+ 0000000000000000.0000000000000000.beeda000c3df8000.be1d6000ffcdc000
+ c84ab71340684590.4d325b2d5a70a792.0a5f45c55f1c9202.b76ddefcb0ebfe6e
+ e9b5f3f66b2e58c1.21a6c3476d21f1e5.5f490104ced83ff8.6262dd37727c80f3
+ 96084deb9ed0411e
+VCVTPH2PS_128(mem)
+ before
+ 2e2dac0350f6fd1c.a81b6e33c572a86a.acf29b0f395c98b4.63483da65c8c49d0
+ 089b756aa3f77018.61c82534e9bf6f37.c9e25f72d82e582b.73a8f718a8c3ec35
+ ff1f240eb3e1553f.6f07136773a2ead3.56428c5a66a2ec77.ecb42ac54b0966d4
+ ee8536da9dbf68bc.3026343700a654eb.2ddd9db4ffc411c4.28bad218e4ebf159
+ 8404eb7f0cf4ca6f
+ after
+ 2e2dac0350f6fd1c.a81b6e33c572a86a.acf29b0f395c98b4.63483da65c8c49d0
+ 0000000000000000.0000000000000000.446900003fb4c000.43918000413a0000
+ ff1f240eb3e1553f.6f07136773a2ead3.56428c5a66a2ec77.ecb42ac54b0966d4
+ ee8536da9dbf68bc.3026343700a654eb.2ddd9db4ffc411c4.28bad218e4ebf159
+ 8404eb7f0cf4ca6f
+
+VCVTPH2PS_128(reg)
+ before
+ 5cdf726562b02dc2.b39925ba7d9d67bc.ff6f850f2c57ea2a.2c810e6dc1a1833d
+ 0c9761367fac55ff.28276f9a6e880c6b.372f015d9242e83d.2ef85b6fc544fd0f
+ f078b65e01737fd2.2bfa8f668c8b14f4.36b2a38dcef18acf.0e0f01a829ba3c66
+ 65ce6d498492e7e7.96df010bf4b23b84.57436a097df30b8d.aa927a03090dfc6d
+ dc4c446c804bf950
+ after
+ 5cdf726562b02dc2.b39925ba7d9d67bc.ff6f850f2c57ea2a.2c810e6dc1a1833d
+ 0000000000000000.0000000000000000.39c1e00037d40000.3d3740003f8cc000
+ f078b65e01737fd2.2bfa8f668c8b14f4.36b2a38dcef18acf.0e0f01a829ba3c66
+ 65ce6d498492e7e7.96df010bf4b23b84.57436a097df30b8d.aa927a03090dfc6d
+ dc4c446c804bf950
+VCVTPH2PS_128(mem)
+ before
+ 810bdacfab80ee3d.c5e48064a393c8e9.47a34273c10a3c47.f5304f3e3ad1a923
+ 769ab818a5b7985e.6d08ed19fa045f84.1810cd8c109ed568.6ec34f98a2199d3c
+ 95c45b338afcb3df.b984aed62671e865.e6f21d40fc7bc013.1c4a678450562685
+ bc563e0c775bfaed.05a5c205c3659f38.8e17b17da2acb976.5d0f926ce1157eaa
+ 8b5fccbef0e1e256
+ after
+ 810bdacfab80ee3d.c5e48064a393c8e9.47a34273c10a3c47.f5304f3e3ad1a923
+ 0000000000000000.0000000000000000.c6a6000041e7c000.3f5a2000bd246000
+ 95c45b338afcb3df.b984aed62671e865.e6f21d40fc7bc013.1c4a678450562685
+ bc563e0c775bfaed.05a5c205c3659f38.8e17b17da2acb976.5d0f926ce1157eaa
+ 8b5fccbef0e1e256
+
+VCVTPH2PS_256(reg)
+ before
+ 048612e51a468e36.c51cdd8f87e12ab4.acb722146c6cbfa9.ea4a022e1d3d7dbb
+ 22cf5e4cfad1bdf5.8de2b4a9d799ff5f.0c05cb6ebd128663.d7568e3e8a3ac80e
+ 4288ae612c0dad40.f0733f448390351b.80ddba7e53e42d12.3208cf9b04b0569c
+ c1fbfd8f4d8698c2.cb9dfb4ea5d18713.6489eab2c96df363.d52c4330a7aae391
+ 9d8e66ea90352a18
+ after
+ 048612e51a468e36.c51cdd8f87e12ab4.acb722146c6cbfa9.ea4a022e1d3d7dbb
+ b75d0000bf4fc000.427c80003da24000.3e410000c1f36000.3896000042d38000
+ 4288ae612c0dad40.f0733f448390351b.80ddba7e53e42d12.3208cf9b04b0569c
+ c1fbfd8f4d8698c2.cb9dfb4ea5d18713.6489eab2c96df363.d52c4330a7aae391
+ 9d8e66ea90352a18
+VCVTPH2PS_256(mem)
+ before
+ 66fab2b3db5ce85e.f9754842f9c9ba28.f82a63b15c68b274.14575775bc3a1202
+ 0c3ca578a32bd88e.474289e7cb61501e.54e7f35bc162726a.ec91fe34c7d6c79a
+ 6b1fba2604afb8d5.08aebee85fda964f.bba02737f3c98220.4784d95987cd4ed8
+ 5f706da71bf2425f.9605e2b252c1c868.09217c310baca0c3.837be65197abe268
+ fbc4208894fdc0f5
+ after
+ 66fab2b3db5ce85e.f9754842f9c9ba28.f82a63b15c68b274.14575775bc3a1202
+ c705400044762000.438d0000be4e8000.3a8ae00042eea000.bf8740003a404000
+ 6b1fba2604afb8d5.08aebee85fda964f.bba02737f3c98220.4784d95987cd4ed8
+ 5f706da71bf2425f.9605e2b252c1c868.09217c310baca0c3.837be65197abe268
+ fbc4208894fdc0f5
+
+VCVTPH2PS_256(reg)
+ before
+ 0aaa836b194e242c.c5fc3ae904033357.4e92f1b240a12214.1a366d352714867e
+ 0e780c65c22b4ab8.778d9ed6d9eb46ea.8ca3e752c306df00.caab752f630ff07e
+ 627bb6e12d1f6d46.51ef145cb9b83843.ac82c1007a7d3cd8.f54b130cdaa89cef
+ 61ff7d4df3b6ca81.31f01866bd76c58f.0a7c7a27fe917447.77e3c0b6a9ec44fc
+ 2c3ffa1aebe6a4d2
+ after
+ 0aaa836b194e242c.c5fc3ae904033357.4e92f1b240a12214.1a366d352714867e
+ bd904000c0200000.474fa0003f9b0000.c6a960003a618000.c3550000bb9de000
+ 627bb6e12d1f6d46.51ef145cb9b83843.ac82c1007a7d3cd8.f54b130cdaa89cef
+ 61ff7d4df3b6ca81.31f01866bd76c58f.0a7c7a27fe917447.77e3c0b6a9ec44fc
+ 2c3ffa1aebe6a4d2
+VCVTPH2PS_256(mem)
+ before
+ f02b3b25bca27a9c.69505d14b27d9d16.f25b26e0042fa9fa.02dd0e32eecfc5fa
+ 9f7301c1392d8087.d4ba52a206ff21b1.70fbbab6a7f19faf.f0f1798fe3c1699c
+ 15e3c8dc7e9273bf.0088596389c893fd.879d51d4c5c764db.3004b7a97cf69dda
+ 2d460a61a5dd0f6f.47086cc3da642fa7.130d662777beb4a9.1e61c5ec52f79c60
+ 16559ec50352a3d9
+ after
+ f02b3b25bca27a9c.69505d14b27d9d16.f25b26e0042fa9fa.02dd0e32eecfc5fa
+ c64b60003cdc0000.3885e000bd3f4000.3837400039c64000.c5d9e000c0bf4000
+ 15e3c8dc7e9273bf.0088596389c893fd.879d51d4c5c764db.3004b7a97cf69dda
+ 2d460a61a5dd0f6f.47086cc3da642fa7.130d662777beb4a9.1e61c5ec52f79c60
+ 16559ec50352a3d9
+
+VCVTPH2PS_256(reg)
+ before
+ 742c3e9e2b92eef2.c569453ccd1b0fc4.0784892e9360315b.f0177599dbe14b46
+ 9432a2e46543b956.b819f459105730e9.9a49ac115048d4c4.f987fa170d3ce4dd
+ d2b3c4044ef23fb2.e22093a48a9d2e0b.5da3cfd6aea6558e.0c28728e28dc3c9c
+ 89fba268812abdb2.1e4a9e0958fac555.adddf0eb4808f067.04c857e949cc0fac
+ bc3127138b19183c
+ after
+ 742c3e9e2b92eef2.c569453ccd1b0fc4.0784892e9360315b.f0177599dbe14b46
+ 43b46000c1fac000.bdd4c00042b1c000.398500004651c000.3d1b80003f938000
+ d2b3c4044ef23fb2.e22093a48a9d2e0b.5da3cfd6aea6558e.0c28728e28dc3c9c
+ 89fba268812abdb2.1e4a9e0958fac555.adddf0eb4808f067.04c857e949cc0fac
+ bc3127138b19183c
+VCVTPH2PS_256(mem)
+ before
+ 12305efa0acd1475.1755377e9a786f01.4a6592749579b0f4.e4450ababbfae0f9
+ e1917689e3f6bf86.d70f7fb13667914c.413cead25e27ac14.5f2619b1a20662f0
+ 0420edac31a0d599.2573776df1835e3e.de9a220dce0e75e0.7acb193b9abab2f9
+ 59a93d4f11d611db.5cce191e65591384.ff4cb613013cc685.918107c43ea20cc0
+ 0194ddb82b49abf0
+ after
+ 12305efa0acd1475.1755377e9a786f01.4a6592749579b0f4.e4450ababbfae0f9
+ 414ca000ba4e8000.baaf2000be1e8000.c488a00039574000.bf7f4000c41f2000
+ 0420edac31a0d599.2573776df1835e3e.de9a220dce0e75e0.7acb193b9abab2f9
+ 59a93d4f11d611db.5cce191e65591384.ff4cb613013cc685.918107c43ea20cc0
+ 0194ddb82b49abf0
+
+VCVTPS2PH_128_0(reg)
+ before
+ 24509983fc3bcc36.baf7e45e9fa43077.da6c63303173ecc9.7e1e22cf15bd5c2f
+ 570037914d04ab3d.05d75ec6f616ee9a.fa99500fef6024ba.39dce32c239cf309
+ f6f2b14fbb3184b2.141625713239066f.17a0dc273ba9f803.0a52741849e54740
+ 5d700527e24d9241.c57eb74d70183523.8fcf04e5b2dca44f.cf4c517ea3a413ff
+ 6295f64a4ce61473
+ after
+ 24509983fc3bcc36.baf7e45e9fa43077.da6c63303173ecc9.7e1e22cf15bd5c2f
+ 570037914d04ab3d.05d75ec6f616ee9a.fa99500fef6024ba.39dce32c239cf309
+ 0000000000000000.0000000000000000.0000000000000000.fc00fc000ee70000
+ 5d700527e24d9241.c57eb74d70183523.8fcf04e5b2dca44f.cf4c517ea3a413ff
+ 6295f64a4ce61473
+VCVTPS2PH_128_0(mem)
+ before
+ a0fae06860b606c7.e8c72e865de41295.f2db8f44cbbf37e2.bc70c3b3ef84644b
+ 66478ac4fc21a428.f34428d9c8833f5b.78fb29445f3bc8d7.fcd015ff8f2e73a3
+ 8e48704b3c31abc2.da30ef8bc0b5573e.34a901384a97a32f.a93bf6332d650e02
+ f5c90ee73af5d7c0.f9da7f07e00794eb.00b0940ba5e08516.20fd62bd65b57115
+ be625608d5abd787
+ after
+ a0fae06860b606c7.e8c72e865de41295.f2db8f44cbbf37e2.7c007c00fc008000
+ 66478ac4fc21a428.f34428d9c8833f5b.78fb29445f3bc8d7.fcd015ff8f2e73a3
+ 8e48704b3c31abc2.da30ef8bc0b5573e.34a901384a97a32f.a93bf6332d650e02
+ f5c90ee73af5d7c0.f9da7f07e00794eb.00b0940ba5e08516.20fd62bd65b57115
+ be625608d5abd787
+
+VCVTPS2PH_128_0(reg)
+ before
+ ddb5cd8016d27d05.7796e0861576e44f.ac8dd5bbc503330e.b9dd5dab8e212ab7
+ db43c391c6b69f3a.f17a6312e7c28d9a.4e94ec120b386f52.3bfcd80321664d3e
+ 125934a781e479d3.3d431279cce48fce.3d3cc0784c2f8563.63d9810079bbabd9
+ df411d2ee2e7467c.38bb69a6e1e9a617.d4d14e592776b1ef.0b40d58cb22d00b1
+ 10fd4e94e9c808f5
+ after
+ ddb5cd8016d27d05.7796e0861576e44f.ac8dd5bbc503330e.b9dd5dab8e212ab7
+ db43c391c6b69f3a.f17a6312e7c28d9a.4e94ec120b386f52.3bfcd80321664d3e
+ 0000000000000000.0000000000000000.0000000000000000.7c0000001fe70000
+ df411d2ee2e7467c.38bb69a6e1e9a617.d4d14e592776b1ef.0b40d58cb22d00b1
+ 10fd4e94e9c808f5
+VCVTPS2PH_128_0(mem)
+ before
+ 4edb6a053a967ecf.9e477892854b43e0.beafe48541dc8da0.6f9f902235982fa0
+ a1a7a4c9c0a51f6b.acb9433f079dacac.abeb000208c90296.69f2843d15223a22
+ e52e79ce9700a7f7.63e279a20368bc8b.db3b370954bcbf24.20162517609f0f22
+ f63a63fedcb4d29c.200d17261638b12a.2a6a07863ec28077.ef56701db49bea4c
+ 31005fb9ada2074b
+ after
+ 4edb6a053a967ecf.9e477892854b43e0.beafe48541dc8da0.800000007c000000
+ a1a7a4c9c0a51f6b.acb9433f079dacac.abeb000208c90296.69f2843d15223a22
+ e52e79ce9700a7f7.63e279a20368bc8b.db3b370954bcbf24.20162517609f0f22
+ f63a63fedcb4d29c.200d17261638b12a.2a6a07863ec28077.ef56701db49bea4c
+ 31005fb9ada2074b
+
+VCVTPS2PH_128_0(reg)
+ before
+ 445ef059e641a1cc.b097e047aacc5b89.3f871736dc9ac535.7446eb65e4e703bb
+ 83bd1e68fb03f57b.ef136b941e54ffe8.1c9c7740ef193457.959960926235021b
+ 4969e55289753f03.8f7980d1535979e5.139832afee423c3d.6930e0fad3ba39c4
+ f4ad41832c22ba11.6c949cea66e687ae.80c745ef729f1792.ccd7e987538166e1
+ 9a5af627ff97439f
+ after
+ 445ef059e641a1cc.b097e047aacc5b89.3f871736dc9ac535.7446eb65e4e703bb
+ 83bd1e68fb03f57b.ef136b941e54ffe8.1c9c7740ef193457.959960926235021b
+ 0000000000000000.0000000000000000.0000000000000000.0000fc0080007c00
+ f4ad41832c22ba11.6c949cea66e687ae.80c745ef729f1792.ccd7e987538166e1
+ 9a5af627ff97439f
+VCVTPS2PH_128_0(mem)
+ before
+ af8484c5f3078d2a.ded72f677f96a350.623139cb7207e36c.bf75aa6c1abe0103
+ e6230d4d4add00ad.6431aa6a1e5e366d.4c1cd56194c94a4e.2ced5f927f2b383c
+ 1d010fab20265755.e309aef8a605af13.0821eb96e737777e.237d5fcd3f71f6e8
+ 2feb05cb92ed4f4d.b5a9377eb31749ef.710cf757885d2728.006fa689f61c78b4
+ 1f1030333fb8fa4b
+ after
+ af8484c5f3078d2a.ded72f677f96a350.623139cb7207e36c.7c00800000007c00
+ e6230d4d4add00ad.6431aa6a1e5e366d.4c1cd56194c94a4e.2ced5f927f2b383c
+ 1d010fab20265755.e309aef8a605af13.0821eb96e737777e.237d5fcd3f71f6e8
+ 2feb05cb92ed4f4d.b5a9377eb31749ef.710cf757885d2728.006fa689f61c78b4
+ 1f1030333fb8fa4b
+
+VCVTPS2PH_256_0(reg)
+ before
+ dbacfa35b7d2b75a.f8ad6b99bb3fa4c2.385e4166df2141ad.63a8769192481679
+ 928efefdf9f5ec8d.5313bd01b82612e0.c673c91ec9aed3f8.b9c3e32f2103009d
+ 9f043af6a1aed58f.1ee978efa4b054d2.bc36ca100a4a3a7d.5127ba1c529aa0bf
+ 5e58aa8b4c88ae0d.34fa174f9ce927c4.76f140aa4182b4e7.06a17746411ab40c
+ b3fd9698098ef5b0
+ after
+ dbacfa35b7d2b75a.f8ad6b99bb3fa4c2.385e4166df2141ad.63a8769192481679
+ 928efefdf9f5ec8d.5313bd01b82612e0.c673c91ec9aed3f8.b9c3e32f2103009d
+ 0000000000000000.0000000000000000.8000fc007c008298.f39efc008e1f0000
+ 5e58aa8b4c88ae0d.34fa174f9ce927c4.76f140aa4182b4e7.06a17746411ab40c
+ b3fd9698098ef5b0
+VCVTPS2PH_256_0(mem)
+ before
+ 360794fec60222d6.2ad7482a960fb2b2.7014160ebbdb47e4.51f2275707e17ae4
+ 698bec649583f5aa.61cd123e19cf1e2b.b001f1161e946f5c.a7837c83faf3cb1d
+ 2a541ab7911c2b5a.5e86033374552e23.ce8e2455e0205c58.d5f13a9ab645e140
+ 532f9ae1d7da8010.7c4e1775412d1d47.a8872cb61d8aca05.37885d08d662faf9
+ 2993e139f7d64ff4
+ after
+ 360794fec60222d6.2ad7482a960fb2b2.7c0080007c000000.800000008000fc00
+ 698bec649583f5aa.61cd123e19cf1e2b.b001f1161e946f5c.a7837c83faf3cb1d
+ 2a541ab7911c2b5a.5e86033374552e23.ce8e2455e0205c58.d5f13a9ab645e140
+ 532f9ae1d7da8010.7c4e1775412d1d47.a8872cb61d8aca05.37885d08d662faf9
+ 2993e139f7d64ff4
+
+VCVTPS2PH_256_0(reg)
+ before
+ 0760c299b42e1fdc.c2e9e9cf82c7aff8.19714a711ce12843.18b88425f2de758f
+ 2d39fd95a9f5a45d.514c816eaff2763f.8f3a9991a2ff8bc2.fceca88e7b281821
+ f76b8d9773b81b24.de24e0a879648e11.3cf6fe426e128171.2ef114ddd37570e8
+ c1426e0dae01c0dd.433f816bfd2bb699.7af177f11da748fc.8b9145fe16d0390f
+ f099b6dd61462ec3
+ after
+ 0760c299b42e1fdc.c2e9e9cf82c7aff8.19714a711ce12843.18b88425f2de758f
+ 2d39fd95a9f5a45d.514c816eaff2763f.8f3a9991a2ff8bc2.fceca88e7b281821
+ 0000000000000000.0000000000000000.000080007c008000.80008000fc007c00
+ c1426e0dae01c0dd.433f816bfd2bb699.7af177f11da748fc.8b9145fe16d0390f
+ f099b6dd61462ec3
+VCVTPS2PH_256_0(mem)
+ before
+ 37d5e366d0e20c30.e70a9c61f55fce33.5d68e1a25652a804.a77700084a491a0e
+ 3d1148867eb08f81.c50f1401e45b82d3.086a7a39a1e6217d.1dd493f591843454
+ de18612787bc73e3.b79cd05818831869.2112ca1cf9f1dd31.3a542e238fe5d179
+ f7b8ab3708137382.19ffced22c62cba0.822c4c377b82984c.5842cbfee0f72e2a
+ d4ec68f21f468712
+ after
+ 37d5e366d0e20c30.e70a9c61f55fce33.288a7c00e879fc00.0000800000008000
+ 3d1148867eb08f81.c50f1401e45b82d3.086a7a39a1e6217d.1dd493f591843454
+ de18612787bc73e3.b79cd05818831869.2112ca1cf9f1dd31.3a542e238fe5d179
+ f7b8ab3708137382.19ffced22c62cba0.822c4c377b82984c.5842cbfee0f72e2a
+ d4ec68f21f468712
+
+VCVTPS2PH_256_0(reg)
+ before
+ 0b9c016be95f18de.62bba1a11cc04c89.478209dbbd84d925.08847c7642a20df9
+ 579f90d5d9cd1c3a.fceebf50e0d0ba24.9c727edf66767ca3.8fe6d7c56a5ff965
+ 1541139c8b1cd0d1.a11d81326f4e7880.761b274ac4c4f0c7.f31ed81010c417bc
+ a1cd852d9cd97050.2d146432e64644c9.30c9028972f8733d.11f7fa4450de2529
+ c33ebc4b44b8ddd8
+ after
+ 0b9c016be95f18de.62bba1a11cc04c89.478209dbbd84d925.08847c7642a20df9
+ 579f90d5d9cd1c3a.fceebf50e0d0ba24.9c727edf66767ca3.8fe6d7c56a5ff965
+ 0000000000000000.0000000000000000.7c00fc00fc00fc00.80007c0080007c00
+ a1cd852d9cd97050.2d146432e64644c9.30c9028972f8733d.11f7fa4450de2529
+ c33ebc4b44b8ddd8
+VCVTPS2PH_256_0(mem)
+ before
+ e6c097130b5efcf6.5791e2f2a78f3762.7c9fe23c60c5d82b.25c80a060da03fb0
+ 56470887bfdd3daf.94d7265949ca62b4.6a8a793cf9d5f0d1.b3633c2f304791cd
+ ef9f8c927c405d2f.b2ed4ecc1e172df2.d3a0a41fce854ae7.35e7926e777aa43f
+ 8d969e225f9318a0.0e1d55b9c001d4c7.93aee0cffbdea09a.06a10a317fc4b5b3
+ 84db9fe3e4b100d4
+ after
+ e6c097130b5efcf6.5791e2f2a78f3762.7c00beea80007c00.7c00fc0080010000
+ 56470887bfdd3daf.94d7265949ca62b4.6a8a793cf9d5f0d1.b3633c2f304791cd
+ ef9f8c927c405d2f.b2ed4ecc1e172df2.d3a0a41fce854ae7.35e7926e777aa43f
+ 8d969e225f9318a0.0e1d55b9c001d4c7.93aee0cffbdea09a.06a10a317fc4b5b3
+ 84db9fe3e4b100d4
+
+VCVTPS2PH_128_1(reg)
+ before
+ 09e14df041cdc14f.0bf7ba2283e22a31.04b4378bce1492e0.8680a7399beeae16
+ f30110c432a534d0.478d5d7e053a4e0c.f0fdf0aee1dda4e8.88e2774acbc13287
+ 9c86e5cb54c59402.1c25022200a7415e.2e467d8e98e7468c.75a0cbeda561e618
+ 62bbc77143b71e92.668b24fb9133bf52.1adad8978cbfb478.29861f0d48dc87f5
+ 805ff098ce3ed14b
+ after
+ 09e14df041cdc14f.0bf7ba2283e22a31.04b4378bce1492e0.8680a7399beeae16
+ f30110c432a534d0.478d5d7e053a4e0c.f0fdf0aee1dda4e8.88e2774acbc13287
+ 0000000000000000.0000000000000000.0000000000000000.fc00fc008001fc00
+ 62bbc77143b71e92.668b24fb9133bf52.1adad8978cbfb478.29861f0d48dc87f5
+ 805ff098ce3ed14b
+VCVTPS2PH_128_1(mem)
+ before
+ 55f9b97953917f46.9fedb2229a090d2c.018b42f3d3ec8415.1004ff355bf02957
+ 861ef69cf4e34e11.b168a24af5479e7b.c9f1d5f8e2de4bd3.6c11edd5a106e2d6
+ ee9b23edcc40fad9.f2789356f1fb0d2b.99885af4db13d1b7.894d9fe1f98d1aa0
+ 683cbc58f8b23fca.bf6982b029b396ea.4f1e4ed5da99d2ee.c5040fc700120f62
+ 7b813bf15120fbc8
+ after
+ 55f9b97953917f46.9fedb2229a090d2c.018b42f3d3ec8415.fc00fc007bff8001
+ 861ef69cf4e34e11.b168a24af5479e7b.c9f1d5f8e2de4bd3.6c11edd5a106e2d6
+ ee9b23edcc40fad9.f2789356f1fb0d2b.99885af4db13d1b7.894d9fe1f98d1aa0
+ 683cbc58f8b23fca.bf6982b029b396ea.4f1e4ed5da99d2ee.c5040fc700120f62
+ 7b813bf15120fbc8
+
+VCVTPS2PH_128_1(reg)
+ before
+ 07121ecd88441b7d.d2cc3eca9347d80f.74876ac63afb7562.c67d2c86fa7c09a3
+ c501b4c64209aa2e.0719232dba0b82d5.6e1d4703bf5de53f.d97270f257c73303
+ 109cfa471afbe686.e2ede96f8809f947.077815d35567232e.66c997070e860c39
+ 462deabeada60932.41150c7a1a4df892.9ce5d1a297a56adb.474e1bb03bc55073
+ 5acd7ad9f991bada
+ after
+ 07121ecd88441b7d.d2cc3eca9347d80f.74876ac63afb7562.c67d2c86fa7c09a3
+ c501b4c64209aa2e.0719232dba0b82d5.6e1d4703bf5de53f.d97270f257c73303
+ 0000000000000000.0000000000000000.0000000000000000.7bffbaf0fc007bff
+ 462deabeada60932.41150c7a1a4df892.9ce5d1a297a56adb.474e1bb03bc55073
+ 5acd7ad9f991bada
+VCVTPS2PH_128_1(mem)
+ before
+ 79112f6f64f5079c.c201829797974fdd.fe5d063c8be33ce1.89ad76dc21a1f8f1
+ 82e7b65c99fdf3e5.df63bd3c7359f634.f791559ff8d88161.2a1f00ed91e9071d
+ 4f1c8c8db3b639e1.fba1981add7938e3.067d74917c37833e.db866b418009d40a
+ be7a2cefcf2b96bb.70050d9d72825295.09eddffd330cfda3.f82db3448c8c9a65
+ de62d56351fe96da
+ after
+ 79112f6f64f5079c.c201829797974fdd.fe5d063c8be33ce1.fc00fc0000008001
+ 82e7b65c99fdf3e5.df63bd3c7359f634.f791559ff8d88161.2a1f00ed91e9071d
+ 4f1c8c8db3b639e1.fba1981add7938e3.067d74917c37833e.db866b418009d40a
+ be7a2cefcf2b96bb.70050d9d72825295.09eddffd330cfda3.f82db3448c8c9a65
+ de62d56351fe96da
+
+VCVTPS2PH_128_1(reg)
+ before
+ e76fcc086aeb0414.a9cd126c0869c6a0.9cdd1a32cd007ff7.daac12cf3a64acbd
+ 6fa194a173e020c0.ede3baf27b7b85bb.d973ba438b80fdb5.56878af3ad4a4cb8
+ d444299809682589.6787a06c436d8e39.8514e93e478d067a.5a4ac156a6cb98bf
+ 71a4885bc70f501c.f18441c67d4b9e45.fa0ba48e9db3d6f2.c0c135e244f24dfe
+ 65b86284a1cb27a3
+ after
+ e76fcc086aeb0414.a9cd126c0869c6a0.9cdd1a32cd007ff7.daac12cf3a64acbd
+ 6fa194a173e020c0.ede3baf27b7b85bb.d973ba438b80fdb5.56878af3ad4a4cb8
+ 0000000000000000.0000000000000000.0000000000000000.fc0080017bff8001
+ 71a4885bc70f501c.f18441c67d4b9e45.fa0ba48e9db3d6f2.c0c135e244f24dfe
+ 65b86284a1cb27a3
+VCVTPS2PH_128_1(mem)
+ before
+ 26fbc229d962e2d7.a20cab554a62dd24.68a718ec4422710c.95a6e59e2a7fabcb
+ dd55d3bb09c439c9.c3ca90f22dec084f.a9bca1cab4fdc2ba.b330aadc8a7cbfaf
+ 67940f284cfce9a3.3028339e0d3a0c46.8e8f584ceae94e7a.aef4eeb358364f4a
+ a082f55bbf17ae91.45738ec585d726b8.f4ecb95e02f1d179.e33fad8f313a9649
+ af5de4ddb013d258
+ after
+ 26fbc229d962e2d7.a20cab554a62dd24.68a718ec4422710c.8001800880018001
+ dd55d3bb09c439c9.c3ca90f22dec084f.a9bca1cab4fdc2ba.b330aadc8a7cbfaf
+ 67940f284cfce9a3.3028339e0d3a0c46.8e8f584ceae94e7a.aef4eeb358364f4a
+ a082f55bbf17ae91.45738ec585d726b8.f4ecb95e02f1d179.e33fad8f313a9649
+ af5de4ddb013d258
+
+VCVTPS2PH_256_1(reg)
+ before
+ 6c9a8e07714d3d22.64ecfe407d2043c1.5df79fd3324f914f.b79f41ec172107e2
+ 7742a77a11751354.8f9ea7c3a323665c.d6006035af2e8bb7.b3736be34585abe2
+ 2ca02ba32b169299.fd646dd04c2dd191.0bd9cf5599014e9d.c435b32da92a7aa5
+ e70216ec5cbcf49e.8a09cb539549408a.57d0e8a18b5417ad.c6b295b85f1c3056
+ 95b48e6f81658922
+ after
+ 6c9a8e07714d3d22.64ecfe407d2043c1.5df79fd3324f914f.b79f41ec172107e2
+ 7742a77a11751354.8f9ea7c3a323665c.d6006035af2e8bb7.b3736be34585abe2
+ 0000000000000000.0000000000000000.7bff000080018001.fc00800180016c2d
+ e70216ec5cbcf49e.8a09cb539549408a.57d0e8a18b5417ad.c6b295b85f1c3056
+ 95b48e6f81658922
+VCVTPS2PH_256_1(mem)
+ before
+ 0d07193d2e134034.05b265c33ff4760f.125b3d3899837173.182fa58322b12192
+ 0a79b210803112b9.e0fd139371495497.7124406c74e81e7a.a9430469f9a6aaf9
+ 68d5d5d393ccbadd.d52f1cc78e47c9e3.83314ed9438203c8.655800beacbeec8e
+ 03e6fe7283eff6cb.40947ccd307b129e.244ee56d2260de8c.aaba95edd88623fc
+ d2b5bf6419898df0
+ after
+ 0d07193d2e134034.05b265c33ff4760f.00008001fc007bff.7bff7bff8001fc00
+ 0a79b210803112b9.e0fd139371495497.7124406c74e81e7a.a9430469f9a6aaf9
+ 68d5d5d393ccbadd.d52f1cc78e47c9e3.83314ed9438203c8.655800beacbeec8e
+ 03e6fe7283eff6cb.40947ccd307b129e.244ee56d2260de8c.aaba95edd88623fc
+ d2b5bf6419898df0
+
+VCVTPS2PH_256_1(reg)
+ before
+ 3b947b8f0a536415.b779aada6ea680b0.7d772f10f5706b75.304780122c8b69f0
+ 3fa5c4d84771e518.605a54f56dfe15b7.e82632fc79b30f14.83e79bb67d116120
+ 3b3296ac6d6e4ba4.d95578b09e02700d.ddeb80fe57ce3c26.f9fcb34432fe8249
+ 8c88ed7717d6d466.003692f81dbe4ede.b71315802c502c58.6d5043a8665c8797
+ bdb0c6ce36392d36
+ after
+ 3b947b8f0a536415.b779aada6ea680b0.7d772f10f5706b75.304780122c8b69f0
+ 3fa5c4d84771e518.605a54f56dfe15b7.e82632fc79b30f14.83e79bb67d116120
+ 0000000000000000.0000000000000000.3d2e7b8f7bff7bff.fc007bff80017bff
+ 8c88ed7717d6d466.003692f81dbe4ede.b71315802c502c58.6d5043a8665c8797
+ bdb0c6ce36392d36
+VCVTPS2PH_256_1(mem)
+ before
+ c4e8bba2bda13050.8cf3c5a6e236ba0a.b0c81fb7053f6b55.d4eaedef93c21b55
+ 5dc49b10189f4c14.98bf1ba36919393b.c4d999db7390839e.8fbc05b829b247ca
+ 6610e404623f3cac.0d37eadc490b8fa6.1a337e4f82bd51e7.44d5584589abea63
+ ba97106bb88dbd45.45a92ebc1d99f6f6.8da34afe4ed3935a.4f80e2044f3a41cb
+ 0b0b9f6018e987ae
+ after
+ c4e8bba2bda13050.8cf3c5a6e236ba0a.7bff000080017bff.e6cd7bff80010000
+ 5dc49b10189f4c14.98bf1ba36919393b.c4d999db7390839e.8fbc05b829b247ca
+ 6610e404623f3cac.0d37eadc490b8fa6.1a337e4f82bd51e7.44d5584589abea63
+ ba97106bb88dbd45.45a92ebc1d99f6f6.8da34afe4ed3935a.4f80e2044f3a41cb
+ 0b0b9f6018e987ae
+
+VCVTPS2PH_256_1(reg)
+ before
+ d6c08bc57f47f9ba.34279d2f35968b0a.9d5fe4af824eabd8.f8f577d6f4dd0223
+ 0beca39f21ddd399.b28a073ef6656128.71a6062013b6eaf8.39f583c290e85d6f
+ 05dbe25a9a3951f7.0e8dc8821606fcca.1eca927d6d5eee01.2a6fe8ae3cfe5e6a
+ 22d9446284e6ae81.26fc5ee9b286181e.fe1783322bd1f4a0.a92e2587172ec23f
+ 90ffb3373b81451b
+ after
+ d6c08bc57f47f9ba.34279d2f35968b0a.9d5fe4af824eabd8.f8f577d6f4dd0223
+ 0beca39f21ddd399.b28a073ef6656128.71a6062013b6eaf8.39f583c290e85d6f
+ 0000000000000000.0000000000000000.000000008001fc00.7bff00000fac8001
+ 22d9446284e6ae81.26fc5ee9b286181e.fe1783322bd1f4a0.a92e2587172ec23f
+ 90ffb3373b81451b
+VCVTPS2PH_256_1(mem)
+ before
+ bdaf0fabc405a22a.bd31c5237e7128e3.d4a3445ee5f0714d.6ed9d5a9ea9b3880
+ 0a08b6d0cfc59797.3131620a2265f8c8.f64df6cdcb51c286.ca5b844f4549f54e
+ 55d7239077cddd8e.dc2316810c4e5ddd.66c8f02281b3c8f2.6eeb8d90d86668b6
+ 78e7d2d9d92a333d.1854ddf6d8b991ce.01deaf4923243fc0.b6d3ebd9407ecd63
+ fe609a94181e6002
+ after
+ bdaf0fabc405a22a.bd31c5237e7128e3.0000fc0000000000.fc00fc00fc006a4f
+ 0a08b6d0cfc59797.3131620a2265f8c8.f64df6cdcb51c286.ca5b844f4549f54e
+ 55d7239077cddd8e.dc2316810c4e5ddd.66c8f02281b3c8f2.6eeb8d90d86668b6
+ 78e7d2d9d92a333d.1854ddf6d8b991ce.01deaf4923243fc0.b6d3ebd9407ecd63
+ fe609a94181e6002
+
+VCVTPS2PH_128_2(reg)
+ before
+ a2de962ffdd15c3e.50063f9610e753cd.4210b3d32431d146.a45cad2eccb0e21a
+ fe98dc158b24fec4.bafee7b33811fa6d.b7a39486894259f1.290e68be98626e2d
+ 6ddc67b25da28240.909c451c6eb3e447.d1587d7aa579647d.6dc05be3a4469f24
+ 4df433720fd7245d.afacd5bdced9cd88.ee7d691b14613094.4d3d038a0b69312c
+ a353dba0ead5df70
+ after
+ a2de962ffdd15c3e.50063f9610e753cd.4210b3d32431d146.a45cad2eccb0e21a
+ fe98dc158b24fec4.bafee7b33811fa6d.b7a39486894259f1.290e68be98626e2d
+ 0000000000000000.0000000000000000.0000000000000000.8147800000018000
+ 4df433720fd7245d.afacd5bdced9cd88.ee7d691b14613094.4d3d038a0b69312c
+ a353dba0ead5df70
+VCVTPS2PH_128_2(mem)
+ before
+ 4ccc5e105c99661d.f92e3cc13e4f1fc8.f3fa1382738f705b.685c54d57186f6e2
+ 24750ac67ebe825f.cdd47e0b8597b02c.38527c577ae28aed.9c423a145875f514
+ 071b5bad6b52ee61.2533f6bc813a1336.5b808a28feded669.e77b184466b967d6
+ d187cbb340606850.5c979f40cdc58392.364fbbe21b8d12fc.a353e8d137de89d3
+ 2d16a827667197b8
+ after
+ 4ccc5e105c99661d.f92e3cc13e4f1fc8.f3fa1382738f705b.034a7c0080007c00
+ 24750ac67ebe825f.cdd47e0b8597b02c.38527c577ae28aed.9c423a145875f514
+ 071b5bad6b52ee61.2533f6bc813a1336.5b808a28feded669.e77b184466b967d6
+ d187cbb340606850.5c979f40cdc58392.364fbbe21b8d12fc.a353e8d137de89d3
+ 2d16a827667197b8
+
+VCVTPS2PH_128_2(reg)
+ before
+ e11053b38ffdcd30.5e88d8c318f5aa57.d04b750405c33deb.a68d8a6feefdf8d2
+ 1b8ce6e04f0e66e8.8ae9fdca101c70a3.dc9d7472c7c07dee.870474bd92394516
+ 37d75b1941319f8c.3175b6b243e17860.dbd798f8ac487f46.b581f3b7244eb4f5
+ 913db0cc02f1b3c7.2ff97f68cd517cb9.2b46de0152e87ea0.0ccf8549bf47029a
+ 68bca55e8030eb32
+ after
+ e11053b38ffdcd30.5e88d8c318f5aa57.d04b750405c33deb.a68d8a6feefdf8d2
+ 1b8ce6e04f0e66e8.8ae9fdca101c70a3.dc9d7472c7c07dee.870474bd92394516
+ 0000000000000000.0000000000000000.0000000000000000.fbfffbff80008000
+ 913db0cc02f1b3c7.2ff97f68cd517cb9.2b46de0152e87ea0.0ccf8549bf47029a
+ 68bca55e8030eb32
+VCVTPS2PH_128_2(mem)
+ before
+ a513cfe482162be8.850ae0642ddae046.6041d5d9cb7738db.263641f8552cb7a7
+ 9e9f80c6e2047dea.6f8ae74d5f7960b4.a01933ef595f6af1.2af3bd4b509e6608
+ 312d32f1bb069e61.ab09c2f3335970be.cb4d15989216cc28.91c94f65dfccc66f
+ 3989634f2a294a7c.95d26cc246074b10.bda9f7bf92a71bac.b903f1b29f411487
+ fcefa19f2c8a8cfd
+ after
+ a513cfe482162be8.850ae0642ddae046.6041d5d9cb7738db.80007c0000017c00
+ 9e9f80c6e2047dea.6f8ae74d5f7960b4.a01933ef595f6af1.2af3bd4b509e6608
+ 312d32f1bb069e61.ab09c2f3335970be.cb4d15989216cc28.91c94f65dfccc66f
+ 3989634f2a294a7c.95d26cc246074b10.bda9f7bf92a71bac.b903f1b29f411487
+ fcefa19f2c8a8cfd
+
+VCVTPS2PH_128_2(reg)
+ before
+ b8d75a9620326a7d.927f8ecd4a783d65.8932e026330d2e55.52f8564f761e13a8
+ 470818041ac5e9b2.18db305838ff3248.e3761d8b97fa553a.6508ac365a886f48
+ 06ced856b4d04648.a668c3da0fcbe652.ffe81c5e0d57fc6a.d4a3775f58f0ecba
+ 527594f68adebded.1af4c541ebe715af.39d4db0931b25e92.7a9632b68f624628
+ 32ad5a2818eb39be
+ after
+ b8d75a9620326a7d.927f8ecd4a783d65.8932e026330d2e55.52f8564f761e13a8
+ 470818041ac5e9b2.18db305838ff3248.e3761d8b97fa553a.6508ac365a886f48
+ 0000000000000000.0000000000000000.0000000000000000.fbff80007c007c00
+ 527594f68adebded.1af4c541ebe715af.39d4db0931b25e92.7a9632b68f624628
+ 32ad5a2818eb39be
+VCVTPS2PH_128_2(mem)
+ before
+ dab3699f129680a9.85484a52397b894a.4f49b178e95f7a8a.ed8854faa096b85e
+ 4d76dd08966fd815.fc95f5d55c34e70e.2034036b2540d210.764f859cf68f4679
+ 66c03150c383fd2d.13a692ea909413e3.6b813705ba95d96d.32746a5ace2a448f
+ 035ee161b2ddaa1e.27c81bff70274976.0afcca34c46a4acc.7c44fda2c4f3ed4e
+ b00b3cdf75747e60
+ after
+ dab3699f129680a9.85484a52397b894a.4f49b178e95f7a8a.000100017c00fbff
+ 4d76dd08966fd815.fc95f5d55c34e70e.2034036b2540d210.764f859cf68f4679
+ 66c03150c383fd2d.13a692ea909413e3.6b813705ba95d96d.32746a5ace2a448f
+ 035ee161b2ddaa1e.27c81bff70274976.0afcca34c46a4acc.7c44fda2c4f3ed4e
+ b00b3cdf75747e60
+
+VCVTPS2PH_256_2(reg)
+ before
+ 2915227d7d3b3371.fe1c6a2981899c14.92478e7f987ac472.db7137e460cce35a
+ 45aeabe876d3472e.35c647934c948f3a.b16fe6d6a518c184.b9abfaffa9c65e42
+ 5e21d38dffc9f743.8228f38b2e093fbc.e08c1f71338e7c57.7f778f72bc6577b1
+ d2bc96d6b1a87f5b.c30eedfc43f567c8.7be936badd663098.0aa27329b5b3ecd2
+ 37f62011aebf77d2
+ after
+ 2915227d7d3b3371.fe1c6a2981899c14.92478e7f987ac472.db7137e460cce35a
+ 45aeabe876d3472e.35c647934c948f3a.b16fe6d6a518c184.b9abfaffa9c65e42
+ 0000000000000000.0000000000000000.6d767c0000197c00.800080008d5f8000
+ d2bc96d6b1a87f5b.c30eedfc43f567c8.7be936badd663098.0aa27329b5b3ecd2
+ 37f62011aebf77d2
+VCVTPS2PH_256_2(mem)
+ before
+ e0401415c692d5dd.fbc9f1302bfc1b23.d243aed4a862c488.ded3251e3f2e1bf3
+ e34bca20163ac21e.3795df0806520647.8d94b3ff795f1228.6597ea0af6727713
+ 1ae5bf20bcc2f9c2.e06c5cc8e1357d72.cece7967d1f50cd5.6c7f80e89ebd80a5
+ 62d86d00d43737f5.549a65de5531bc50.72bb7bf9cc326fbb.4fd7e326d29b7454
+ 6aed102f2e988dcd
+ after
+ e0401415c692d5dd.fbc9f1302bfc1b23.fbff0001012c0001.80007c007c00fbff
+ e34bca20163ac21e.3795df0806520647.8d94b3ff795f1228.6597ea0af6727713
+ 1ae5bf20bcc2f9c2.e06c5cc8e1357d72.cece7967d1f50cd5.6c7f80e89ebd80a5
+ 62d86d00d43737f5.549a65de5531bc50.72bb7bf9cc326fbb.4fd7e326d29b7454
+ 6aed102f2e988dcd
+
+VCVTPS2PH_256_2(reg)
+ before
+ 190c026f4f4108bb.97f152ac79a338e2.ed6bf4b500d2fe8f.552735a28721f705
+ b87fb552d02120cc.96fce910c815b7b5.082a07b97ea580d9.54e0244c1dcf60e0
+ a29325444ec512a9.39c5af18dc96719b.022499566a367eda.49b0c2e5ab476577
+ 35954eb164b81a01.5d181eb0d13422c0.35a6a7f8600f343f.11658d574d95c3f7
+ 88c900901dc5368c
+ after
+ 190c026f4f4108bb.97f152ac79a338e2.ed6bf4b500d2fe8f.552735a28721f705
+ b87fb552d02120cc.96fce910c815b7b5.082a07b97ea580d9.54e0244c1dcf60e0
+ 0000000000000000.0000000000000000.83fefbff8000fbff.00017c007c000001
+ 35954eb164b81a01.5d181eb0d13422c0.35a6a7f8600f343f.11658d574d95c3f7
+ 88c900901dc5368c
+VCVTPS2PH_256_2(mem)
+ before
+ 8aa9987b39e47961.cccf2d05af86747e.dec1b4c5c4fa8650.fefa2b0bfdbeddb4
+ f3d1cf04bdfd4aa3.c38dbdaccabb5bcc.988bec41d1f55876.ba6d23fbddcfb6e4
+ 9f0d2317c41d637d.751dfa1352e40c98.674442111330555e.c34a8a359bcdfb7c
+ 6a280fa06b4f801c.40c9e0a4e28cc38e.27b63222a6b73935.76df5c23d344e727
+ 2f76953322c0b892
+ after
+ 8aa9987b39e47961.cccf2d05af86747e.fbffafeadc6dfbff.8000fbff9369fbff
+ f3d1cf04bdfd4aa3.c38dbdaccabb5bcc.988bec41d1f55876.ba6d23fbddcfb6e4
+ 9f0d2317c41d637d.751dfa1352e40c98.674442111330555e.c34a8a359bcdfb7c
+ 6a280fa06b4f801c.40c9e0a4e28cc38e.27b63222a6b73935.76df5c23d344e727
+ 2f76953322c0b892
+
+VCVTPS2PH_256_2(reg)
+ before
+ 4a5c32cf23cea869.30f00f8bcd9f5fac.7fdf6bcd1740bc59.b4ae395fa797c027
+ 23de2e6573f9f357.cd2f9fc5071aba58.8da998f88c8b32a6.eaf8d1b431daa560
+ 16458560adcdd709.1db23c3834cb4d4d.c8746293ddf96221.a55f780d618fa50b
+ 7fe0332c6ed78e2a.fc4561d270bed6b6.8a8cc509a7178875.c1b1aa5552bf7b54
+ 1ab8e17b2178e568
+ after
+ 4a5c32cf23cea869.30f00f8bcd9f5fac.7fdf6bcd1740bc59.b4ae395fa797c027
+ 23de2e6573f9f357.cd2f9fc5071aba58.8da998f88c8b32a6.eaf8d1b431daa560
+ 0000000000000000.0000000000000000.00017c00fbff0001.80008000fbff0001
+ 7fe0332c6ed78e2a.fc4561d270bed6b6.8a8cc509a7178875.c1b1aa5552bf7b54
+ 1ab8e17b2178e568
+VCVTPS2PH_256_2(mem)
+ before
+ 8c419b68e9c69d73.bafa353551a25467.46b48a7dd8000fc0.313cbec68670df4e
+ f3185309c7b360a0.23de85e7f3ba676c.d7ca3327879cb597.17d247361590a45a
+ 2a09854ad64de91c.16da21aeefac01e4.8b55d9bb9a9e8466.9a985ec5f0031343
+ 0fe0ad1832a0f513.ef3804f7e2035f7c.3d1ff6252d13375a.14dcfee0b45668b5
+ e2e823f1fc15de5d
+ after
+ 8c419b68e9c69d73.bafa353551a25467.fbfffbff0001fbff.fbff800000010001
+ f3185309c7b360a0.23de85e7f3ba676c.d7ca3327879cb597.17d247361590a45a
+ 2a09854ad64de91c.16da21aeefac01e4.8b55d9bb9a9e8466.9a985ec5f0031343
+ 0fe0ad1832a0f513.ef3804f7e2035f7c.3d1ff6252d13375a.14dcfee0b45668b5
+ e2e823f1fc15de5d
+
+VCVTPS2PH_128_3(reg)
+ before
+ 6109ca6565cab2e7.7d69475df9b640b0.0a452b2c674cbddf.cbf508515b068b9e
+ eb8aeda98a0320fe.506fd007449d8620.c34d90bb1a1256ba.10a38a2b40833c5f
+ ddb98a28084c634f.63bfc3013161828e.759b310e98e167b9.e8f5f99ff99706c8
+ 94e09c4d7a2fb985.94259c37dc0df227.7e7d09937d452c87.2eb7cf99a14da407
+ c0b48a0655b1d345
+ after
+ 6109ca6565cab2e7.7d69475df9b640b0.0a452b2c674cbddf.cbf508515b068b9e
+ eb8aeda98a0320fe.506fd007449d8620.c34d90bb1a1256ba.10a38a2b40833c5f
+ 0000000000000000.0000000000000000.0000000000000000.da6c000000004419
+ 94e09c4d7a2fb985.94259c37dc0df227.7e7d09937d452c87.2eb7cf99a14da407
+ c0b48a0655b1d345
+VCVTPS2PH_128_3(mem)
+ before
+ 78baa5d030d04fb1.6a4d20867d3a5b4d.bd6dd8955fad8f17.393d14b564cbe1d0
+ 34939ce54eb5d374.bc4a103eacf98853.bc63f107d94d1889.02284fdfe9fec142
+ ce0cec2fcc6d1cbd.e6246ae1a4f77a42.6cd3657964fa47a9.348ab47fa96b0987
+ 24f10f9cc602e6b6.442729db00c06ec7.a888afd71cbfd9a5.2daf41013f9df44b
+ 48e3f1cf4820c03b
+ after
+ 78baa5d030d04fb1.6a4d20867d3a5b4d.bd6dd8955fad8f17.a31ffbff0000fbff
+ 34939ce54eb5d374.bc4a103eacf98853.bc63f107d94d1889.02284fdfe9fec142
+ ce0cec2fcc6d1cbd.e6246ae1a4f77a42.6cd3657964fa47a9.348ab47fa96b0987
+ 24f10f9cc602e6b6.442729db00c06ec7.a888afd71cbfd9a5.2daf41013f9df44b
+ 48e3f1cf4820c03b
+
+VCVTPS2PH_128_3(reg)
+ before
+ e072c1566081a703.100e83175782ed8c.329e49985ce0a08d.4e504c0d1ea88aa7
+ 53a7ab02214be64e.702ec38c9cf9ec6a.0cd7c78555e44c41.38f5b60885c215db
+ fbcfad402a0ab8c9.1e1f4ce7b072a07d.2e1c9d0c8757ad8f.43446bb26e18386e
+ 3637c27a144a5b20.f8ab9814aff9c5f0.f4bac99b8dc50022.4c09e6f9f4b7ac8c
+ 2d0fa3c734a93060
+ after
+ e072c1566081a703.100e83175782ed8c.329e49985ce0a08d.4e504c0d1ea88aa7
+ 53a7ab02214be64e.702ec38c9cf9ec6a.0cd7c78555e44c41.38f5b60885c215db
+ 0000000000000000.0000000000000000.0000000000000000.00007bff07ad8000
+ 3637c27a144a5b20.f8ab9814aff9c5f0.f4bac99b8dc50022.4c09e6f9f4b7ac8c
+ 2d0fa3c734a93060
+VCVTPS2PH_128_3(mem)
+ before
+ c3241e0a49fd7e17.5e28e61e7d9809fe.d89f25ffb69a16f0.bafd469c03bb81a7
+ eb12d4ad50bc53dc.d1f115970180fe0f.9bc76e95e06250a9.dc31117d86c46bc9
+ 7fa6409c64f46bdc.e9dd4c503b8c7801.1defefc04a5c2f46.b6a224a9b26dfb35
+ 5caddec3a1b08243.033786b7c84ab17d.3be2256e10956ff4.a49c7d8b21406d97
+ 026a179172ccfc9a
+ after
+ c3241e0a49fd7e17.5e28e61e7d9809fe.d89f25ffb69a16f0.8000fbfffbff8000
+ eb12d4ad50bc53dc.d1f115970180fe0f.9bc76e95e06250a9.dc31117d86c46bc9
+ 7fa6409c64f46bdc.e9dd4c503b8c7801.1defefc04a5c2f46.b6a224a9b26dfb35
+ 5caddec3a1b08243.033786b7c84ab17d.3be2256e10956ff4.a49c7d8b21406d97
+ 026a179172ccfc9a
+
+VCVTPS2PH_128_3(reg)
+ before
+ 2c59ee263f9ae6eb.5ef02a0e24fd533c.7c4dbf374346e632.cf6e8a894c18cbde
+ 7db5feb724386535.623ea06909e69bf4.ae69f33c480a53ca.b65d9cff1df10031
+ 53ca44aebd31b525.4262bdc16b771596.f6d81f33742433f2.cc7dd6bb9c2cca19
+ 09e4bb78a8121467.db27fc0066bc7f4f.b0e63d866320c355.ed98b4a9e8d6e4c1
+ f57ab3b51afb0c56
+ after
+ 2c59ee263f9ae6eb.5ef02a0e24fd533c.7c4dbf374346e632.cf6e8a894c18cbde
+ 7db5feb724386535.623ea06909e69bf4.ae69f33c480a53ca.b65d9cff1df10031
+ 0000000000000000.0000000000000000.0000000000000000.80007bff80370000
+ 09e4bb78a8121467.db27fc0066bc7f4f.b0e63d866320c355.ed98b4a9e8d6e4c1
+ f57ab3b51afb0c56
+VCVTPS2PH_128_3(mem)
+ before
+ c0f14ecb50a5fc04.fb4f5f827e66bca6.095bd91417c2934b.39df4ba2b0883fa0
+ 2ac801d7a6e270f6.84562c36ddb9ea8e.a8c8d0e79a950eb5.eb0e45f4f7eae27e
+ 0fca48c537bd2658.02471f026197d9cd.943b5e67093fabba.23c025e6d5d2e99c
+ 4bc49f812043d857.cf6c3250a58dc60d.a980fe7f83ce2785.3d42e9e6e5cb90a6
+ 95de8b5fc4611347
+ after
+ c0f14ecb50a5fc04.fb4f5f827e66bca6.095bd91417c2934b.80008000fbfffbff
+ 2ac801d7a6e270f6.84562c36ddb9ea8e.a8c8d0e79a950eb5.eb0e45f4f7eae27e
+ 0fca48c537bd2658.02471f026197d9cd.943b5e67093fabba.23c025e6d5d2e99c
+ 4bc49f812043d857.cf6c3250a58dc60d.a980fe7f83ce2785.3d42e9e6e5cb90a6
+ 95de8b5fc4611347
+
+VCVTPS2PH_256_3(reg)
+ before
+ 87e109bc0d20ad2c.ba8283f87c7f421f.4912638e4626edfa.c3622c1b224d3e43
+ 6f975f6b5d959b00.38d06f14677d22db.cb85ad27dfef8a41.beaf642702c9ac20
+ a94b87d74f4b1970.a17adfc3fe4a32b8.b0100d870c73d98e.7631228f404d2c47
+ 914b7f6c80ce6328.d14c4ff05df12fe2.56017d1a6a3e158c.c6b5e33ff7e57be5
+ 8c072223439e5525
+ after
+ 87e109bc0d20ad2c.ba8283f87c7f421f.4912638e4626edfa.c3622c1b224d3e43
+ 6f975f6b5d959b00.38d06f14677d22db.cb85ad27dfef8a41.beaf642702c9ac20
+ 0000000000000000.0000000000000000.7bff7bff06837bff.fbfffbffb57b0000
+ 914b7f6c80ce6328.d14c4ff05df12fe2.56017d1a6a3e158c.c6b5e33ff7e57be5
+ 8c072223439e5525
+VCVTPS2PH_256_3(mem)
+ before
+ a3f35b2742837634.83e142978babb4d3.a4113b879f7ed584.17a2fb4c94dd7be8
+ e7450a380da0993e.db5accc20d6d491e.f5972073e0fedfcb.5040828927db464e
+ 33c9ac40c2027b6e.f502195aa1a15db4.a2a879a23d7c0ae2.5e270e3ebfc4b369
+ 654abc2d7db4d8f1.2c2526cce3d22e42.8611c200d10412f6.9ba39cd5f625fee5
+ 640027bc6b896370
+ after
+ a3f35b2742837634.83e142978babb4d3.fbff0000fbff0000.fbfffbff7bff0000
+ e7450a380da0993e.db5accc20d6d491e.f5972073e0fedfcb.5040828927db464e
+ 33c9ac40c2027b6e.f502195aa1a15db4.a2a879a23d7c0ae2.5e270e3ebfc4b369
+ 654abc2d7db4d8f1.2c2526cce3d22e42.8611c200d10412f6.9ba39cd5f625fee5
+ 640027bc6b896370
+
+VCVTPS2PH_256_3(reg)
+ before
+ 168aaa5db77c1eb3.5895b6ea59c26bf2.de0fee83708cf673.7d9e7877b9a3b333
+ 0b8c2426798b6a5d.e77616637239f19e.660b6deae45bf2f5.621a15f41064a8c4
+ a0538e824a418418.f0b95884e5242620.20669f6fdacc5d71.13531763f353aed5
+ 312ce5ddc92aa790.4e2af939ce90c5a5.676d807dee6a7596.6a13f9b17d7d8194
+ 452737c8cbeddc2b
+ after
+ 168aaa5db77c1eb3.5895b6ea59c26bf2.de0fee83708cf673.7d9e7877b9a3b333
+ 0b8c2426798b6a5d.e77616637239f19e.660b6deae45bf2f5.621a15f41064a8c4
+ 0000000000000000.0000000000000000.00007bfffbff7bff.7bfffbff7bff0000
+ 312ce5ddc92aa790.4e2af939ce90c5a5.676d807dee6a7596.6a13f9b17d7d8194
+ 452737c8cbeddc2b
+VCVTPS2PH_256_3(mem)
+ before
+ 7f5b4e96f8b07cc6.1a6126a38fd31173.dc95746e47878c59.996a0d80bdc2740e
+ f37bb80620d01d92.b83e4c403ac7fc6a.78c5450f6f173567.11aa41e4e25f9685
+ 7e53f304605c7bbb.651153206692a424.fc88e808604c7cfc.2a781815facd19a8
+ fdaf7a7cd45d516c.7e2538b0aec1474b.46a8d94636311f44.42228e7fa1993723
+ b3f2a08f714e2da1
+ after
+ 7f5b4e96f8b07cc6.1a6126a38fd31173.fbff000082f9163f.7bff7bff0000fbff
+ f37bb80620d01d92.b83e4c403ac7fc6a.78c5450f6f173567.11aa41e4e25f9685
+ 7e53f304605c7bbb.651153206692a424.fc88e808604c7cfc.2a781815facd19a8
+ fdaf7a7cd45d516c.7e2538b0aec1474b.46a8d94636311f44.42228e7fa1993723
+ b3f2a08f714e2da1
+
+VCVTPS2PH_256_3(reg)
+ before
+ db3648af097836cf.4a5aca5a97e15cd2.5fc5f55aaedf1f8b.30f295b30ed2d86a
+ 1438844d02a38f59.43215d8ac5f35818.643e888b03796992.9732973d033b649a
+ bc4550d3fa5c74ea.c2d1b1f87b9f006c.e9dcecb049196109.58335bce32797f02
+ 2da9a6af8d2f212e.a4344190d5f211f7.4aeb1e341b4e429f.4dc35e54b697e4cc
+ 53ac1abaaba25024
+ after
+ db3648af097836cf.4a5aca5a97e15cd2.5fc5f55aaedf1f8b.30f295b30ed2d86a
+ 1438844d02a38f59.43215d8ac5f35818.643e888b03796992.9732973d033b649a
+ 0000000000000000.0000000000000000.00000000590aef9a.7bff000080000000
+ 2da9a6af8d2f212e.a4344190d5f211f7.4aeb1e341b4e429f.4dc35e54b697e4cc
+ 53ac1abaaba25024
+VCVTPS2PH_256_3(mem)
+ before
+ 47bb0dec2ea57f37.c5af844c56a6d2d3.c616893fedf747e7.e3b7188215a149fe
+ 03bdb2d65bac2c31.dea5e516f24fc282.024505efe2bb5e68.0f8bd808d4a0b2d2
+ 647b85644dc3143d.d5d5c579fcb62eea.358c328ece4911a6.2cb55931f3d6b9c8
+ 46c4038221f7f388.078c20e1106551b5.3bb68b07cdad1dcc.957f97690fcf998c
+ a6368e1cc3188fca
+ after
+ 47bb0dec2ea57f37.c5af844c56a6d2d3.00007bfffbfffbff.0000fbff0000fbff
+ 03bdb2d65bac2c31.dea5e516f24fc282.024505efe2bb5e68.0f8bd808d4a0b2d2
+ 647b85644dc3143d.d5d5c579fcb62eea.358c328ece4911a6.2cb55931f3d6b9c8
+ 46c4038221f7f388.078c20e1106551b5.3bb68b07cdad1dcc.957f97690fcf998c
+ a6368e1cc3188fca
+
+VCVTPS2PH_128_4(reg)
+ before
+ bb263bb7ac3dd62d.8563a61df253853d.ce16f2bacbea6990.f0908c45fcf43e06
+ 2f9b99a465c8ac61.fd23ec1fdce48589.87bf3870c9d1b026.30e6b13676282f82
+ 60e0a4508b474b13.8ad25076fcb5b098.8ed3ed6fa5a46224.d78477c55858ae69
+ a9435828b945f0ef.083a4f0c6dd2c295.409d0d24fbf1bd35.c23659debd8d75ea
+ cbc7d36dc1d5402f
+ after
+ bb263bb7ac3dd62d.8563a61df253853d.ce16f2bacbea6990.f0908c45fcf43e06
+ 2f9b99a465c8ac61.fd23ec1fdce48589.87bf3870c9d1b026.30e6b13676282f82
+ 0000000000000000.0000000000000000.0000000000000000.8000fc0000007c00
+ a9435828b945f0ef.083a4f0c6dd2c295.409d0d24fbf1bd35.c23659debd8d75ea
+ cbc7d36dc1d5402f
+VCVTPS2PH_128_4(mem)
+ before
+ cf05615f813bcd64.650eb2968b4fd6a0.532863cf4c4877ad.f76b95fa6844fb06
+ a9ba7f9e19ccd6b6.f28eac089ff03bd3.47680aaab4228a0b.10877f5c87275943
+ 37b3aa17a4931751.aa0f44e98eb45934.c0c5bf89c26cb8dc.e73ec9b8f5291397
+ 54bc5db73e9c4e61.ebdd75a5f6276c6e.9a0dfe589133bc4c.d8dc6e794dd364af
+ 3fad6a0b2cb38936
+ after
+ cf05615f813bcd64.650eb2968b4fd6a0.532863cf4c4877ad.7b40800300008000
+ a9ba7f9e19ccd6b6.f28eac089ff03bd3.47680aaab4228a0b.10877f5c87275943
+ 37b3aa17a4931751.aa0f44e98eb45934.c0c5bf89c26cb8dc.e73ec9b8f5291397
+ 54bc5db73e9c4e61.ebdd75a5f6276c6e.9a0dfe589133bc4c.d8dc6e794dd364af
+ 3fad6a0b2cb38936
+
+VCVTPS2PH_128_4(reg)
+ before
+ 78fcbada2d54bed9.dca1146904f43511.0f443ca873d6b22d.b10a44033e825486
+ df175852ed423e44.ab2d4b1812a6898d.7490935e9f4d651f.e1890b76e4653ab7
+ d04842df070a4722.9d2e7eb283be0602.740c78331916c2ee.0656d19da0e92b0a
+ a7dc73ed18371320.8e6e2a227349679c.6d05e6937bbf0446.fc3d11658d19e2ac
+ 9e0a48b8c8011cc8
+ after
+ 78fcbada2d54bed9.dca1146904f43511.0f443ca873d6b22d.b10a44033e825486
+ df175852ed423e44.ab2d4b1812a6898d.7490935e9f4d651f.e1890b76e4653ab7
+ 0000000000000000.0000000000000000.0000000000000000.7c008000fc00fc00
+ a7dc73ed18371320.8e6e2a227349679c.6d05e6937bbf0446.fc3d11658d19e2ac
+ 9e0a48b8c8011cc8
+VCVTPS2PH_128_4(mem)
+ before
+ ca89f0846cae958b.bf1fe8e9bb56dee9.59baca54ff526986.b9c7d9eb61d469d4
+ 5b8587b3952b0921.765d9b3d8cf2e62a.dcdeda3442e5c8ed.b59e4ea568df2b44
+ 2ccb8833608433b6.27e28a572897658e.f2a6d6ae590f40fd.bed6402f2b6e8641
+ 1b276fefe9c6d174.2ef9b0a22bd197c3.76de3baf5fdb8ce1.2ebbabf3470db878
+ 62988b5f5746fb94
+ after
+ ca89f0846cae958b.bf1fe8e9bb56dee9.59baca54ff526986.fc00572e80147c00
+ 5b8587b3952b0921.765d9b3d8cf2e62a.dcdeda3442e5c8ed.b59e4ea568df2b44
+ 2ccb8833608433b6.27e28a572897658e.f2a6d6ae590f40fd.bed6402f2b6e8641
+ 1b276fefe9c6d174.2ef9b0a22bd197c3.76de3baf5fdb8ce1.2ebbabf3470db878
+ 62988b5f5746fb94
+
+VCVTPS2PH_128_4(reg)
+ before
+ b6badcdef8a78c42.0365b8d34bfc9c8a.e7f00989302dba72.46518421715669c6
+ 876d9bdcc5bca72e.bf51e0cba2325322.ad11927ad336084a.3ccd2df1aa8a93d7
+ 2ffdc2c55b0f8703.2db762c30b75b069.dea946e0b179bef5.361cb20c2785c541
+ 0bb64f05552e696e.2762baa7a1d0708a.d50420276581181f.0f0b8f5d0353bc2f
+ a26641cf5aff34ce
+ after
+ b6badcdef8a78c42.0365b8d34bfc9c8a.e7f00989302dba72.46518421715669c6
+ 876d9bdcc5bca72e.bf51e0cba2325322.ad11927ad336084a.3ccd2df1aa8a93d7
+ 0000000000000000.0000000000000000.0000000000000000.8000fc0026698000
+ 0bb64f05552e696e.2762baa7a1d0708a.d50420276581181f.0f0b8f5d0353bc2f
+ a26641cf5aff34ce
+VCVTPS2PH_128_4(mem)
+ before
+ ccf943504995e94a.77e43d084fa5891a.8b20646381504fb2.ed6e1ebb8cda5175
+ 6a8f11cbec2196ce.cfd2893ae6ff22b4.33bbdde4c7ff080c.84323c09c110a7a3
+ 56f6272c5eb0f887.6fce2e08b6c871e8.fc009f148dec59c5.f49e747ba1b05354
+ 6ef8025fbb4dcba1.b32227dc5a8cb261.c3bb28e1f220fb09.9bfcc47ec7469435
+ d399277fd05ca4f2
+ after
+ ccf943504995e94a.77e43d084fa5891a.8b20646381504fb2.0001fc008000c885
+ 6a8f11cbec2196ce.cfd2893ae6ff22b4.33bbdde4c7ff080c.84323c09c110a7a3
+ 56f6272c5eb0f887.6fce2e08b6c871e8.fc009f148dec59c5.f49e747ba1b05354
+ 6ef8025fbb4dcba1.b32227dc5a8cb261.c3bb28e1f220fb09.9bfcc47ec7469435
+ d399277fd05ca4f2
+
+VCVTPS2PH_256_4(reg)
+ before
+ fac199e95780c036.8c621d512005ca47.f71b72246ed821cc.62b7f4350fa9ad03
+ 6ac01727f93e24ad.8d00f4eca9e2b2a5.97050b4a8f37f9d4.b7c27dfe029229e0
+ 81e19ba751200b05.4e9e031d71f33fe9.ef2cecbc58357726.9ca64127e7e72ccc
+ 997223d4fcb4b3e7.a908c6d194412d3c.5bdb13e5665fd76e.ee30ba9cd9b572f2
+ 892d6649f507b77e
+ after
+ fac199e95780c036.8c621d512005ca47.f71b72246ed821cc.62b7f4350fa9ad03
+ 6ac01727f93e24ad.8d00f4eca9e2b2a5.97050b4a8f37f9d4.b7c27dfe029229e0
+ 0000000000000000.0000000000000000.7c00fc0080008000.8000800081850000
+ 997223d4fcb4b3e7.a908c6d194412d3c.5bdb13e5665fd76e.ee30ba9cd9b572f2
+ 892d6649f507b77e
+VCVTPS2PH_256_4(mem)
+ before
+ 4966c11a56eab69e.0fbdaa1a95855502.7b09baf22fda37cd.37607b3155405557
+ 0baa45fb18692c7b.3f0f2ce5b8203000.a06e5a6e5dc91ac6.de05200cbf652c8e
+ a9c470d95890d444.86b5a6a102107e8e.f40422303b1b9254.ae17ffe2435999be
+ 007fbd9e326c6c23.3ef813ba0fe17c57.53958e24e4db5aa2.02e2121f7aa8d894
+ 36b2573003de24bc
+ after
+ 4966c11a56eab69e.0fbdaa1a95855502.0000000038798281.80007c00fc00bb29
+ 0baa45fb18692c7b.3f0f2ce5b8203000.a06e5a6e5dc91ac6.de05200cbf652c8e
+ a9c470d95890d444.86b5a6a102107e8e.f40422303b1b9254.ae17ffe2435999be
+ 007fbd9e326c6c23.3ef813ba0fe17c57.53958e24e4db5aa2.02e2121f7aa8d894
+ 36b2573003de24bc
+
+VCVTPS2PH_256_4(reg)
+ before
+ a5d2c97f7788bae1.eca9a838c108ae44.c4276f9d7a206608.9aed1b36751530dd
+ ad90647a7432d6af.490cad91217056d5.748db6b4df58784c.a3da435209d5ce30
+ a936258b9666b4d4.f37549976fb022ff.aa75e46ddb16edd0.4b278464bc28f0c8
+ f411072078b28ffa.c6b0f635c6d5ccf1.c32cf63309e40240.6e9f5a58ac1a54d1
+ e80f3f1bf2b5b476
+ after
+ a5d2c97f7788bae1.eca9a838c108ae44.c4276f9d7a206608.9aed1b36751530dd
+ ad90647a7432d6af.490cad91217056d5.748db6b4df58784c.a3da435209d5ce30
+ 0000000000000000.0000000000000000.80007c007c000000.7c00fc0080000000
+ f411072078b28ffa.c6b0f635c6d5ccf1.c32cf63309e40240.6e9f5a58ac1a54d1
+ e80f3f1bf2b5b476
+VCVTPS2PH_256_4(mem)
+ before
+ 9444b197ac07cce3.ec6d05a4b6a1a4cd.9e88325743eb11d5.19fee7710650f247
+ 50072abaf61c5a46.eb961e83edc02ffa.57cb79e901fcadd7.0937b3956de6fb92
+ f727286eebfe18c9.4fd84b29b99a6b2d.cd4345d71d165b24.ef23560adb3157cc
+ 656f27a1a7bbc398.727e8a02b5bb9511.dbbd140db245d8e8.270ecc3cebbd43a2
+ 0d4312973a16fac0
+ after
+ 9444b197ac07cce3.ec6d05a4b6a1a4cd.7c00fc00fc00fc00.7c00000000007c00
+ 50072abaf61c5a46.eb961e83edc02ffa.57cb79e901fcadd7.0937b3956de6fb92
+ f727286eebfe18c9.4fd84b29b99a6b2d.cd4345d71d165b24.ef23560adb3157cc
+ 656f27a1a7bbc398.727e8a02b5bb9511.dbbd140db245d8e8.270ecc3cebbd43a2
+ 0d4312973a16fac0
+
+VCVTPS2PH_256_4(reg)
+ before
+ fd0f238763c9b9d1.76aaa13e475e17e0.b2d6d57a7db0e953.5f056177dd93e04f
+ 52bffb790361bc82.06a61431e6f4cfcd.692a2afdae04a39e.34e7a802b90e2f84
+ 6a9d96d7b56b3f7e.f02dfb66a188a88b.f4c785f8e443fea0.362f659862c280b3
+ a0f5f10f15717d72.120cd2c993275e44.b0f9e0d5b9fa3702.41a91527f6b99009
+ 302032998e011bb2
+ after
+ fd0f238763c9b9d1.76aaa13e475e17e0.b2d6d57a7db0e953.5f056177dd93e04f
+ 52bffb790361bc82.06a61431e6f4cfcd.692a2afdae04a39e.34e7a802b90e2f84
+ 0000000000000000.0000000000000000.7c0000000000fc00.7c00800000078871
+ a0f5f10f15717d72.120cd2c993275e44.b0f9e0d5b9fa3702.41a91527f6b99009
+ 302032998e011bb2
+VCVTPS2PH_256_4(mem)
+ before
+ e1613adc48a6dcd9.5015078bc002b309.470f1546d9dbad27.f70c3901ccb48a72
+ 2f38a8db40b290ab.d648d4b952a71df1.6a0141c98eb2505e.264b8be9b6fd329c
+ f571f9829134f354.8dd9540466eef7d3.59b0d13fcfb80416.9a04d2f816626c2c
+ 11d8a7bd5735c0ff.d31583d898627c5e.efe64192b7f7857a.ad810a9a856e74cd
+ bc0f303ba1ad862b
+ after
+ e1613adc48a6dcd9.5015078bc002b309.00004595fc007c00.7c0080000000807f
+ 2f38a8db40b290ab.d648d4b952a71df1.6a0141c98eb2505e.264b8be9b6fd329c
+ f571f9829134f354.8dd9540466eef7d3.59b0d13fcfb80416.9a04d2f816626c2c
+ 11d8a7bd5735c0ff.d31583d898627c5e.efe64192b7f7857a.ad810a9a856e74cd
+ bc0f303ba1ad862b
+
--- /dev/null
+prog: f16c
+prereq: test -x f16c && ../../../tests/x86_amd64_features amd64-f16c
+vgopts: -q
--- /dev/null
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "tests/malloc.h"
+
+typedef unsigned char UChar;
+typedef unsigned int UInt;
+typedef unsigned long int UWord;
+typedef unsigned long long int ULong;
+
+// What can we actually test here? The instructions take no input and
+// produce output which is by definition totally random. So apart from
+// not simply failing insn decode, there's nothing much to test.
+
+// Get 10 values of each size, and check that they are not all the same
+// (otherwise something's obviously wrong). Now, statistically, it's
+// highly unlikely that they are all the same. For 10 16 bit ints, the
+// probability of them being all the same is (I'd guess) (2^-16) ^ (10-1),
+// that is, 2^-144.
+
+ULong do_rdrand64 ( void )
+{
+ while (1) {
+ ULong res = 0;
+ ULong cflag = 0;
+ __asm__ __volatile__(
+ "movabsq $0x5555555555555555, %%r11 ; "
+ "movq $0, %%r12 ; "
+ "rdrand %%r11 ; "
+ "setc %%r12b ; "
+ "movq %%r11, %0 ; "
+ "movq %%r12, %1"
+ : "=r"(res), "=r"(cflag) : : "r11", "r12"
+ );
+ if (cflag == 1)
+ return res;
+ }
+ /*NOTREACHED*/
+}
+
+ULong do_rdrand32 ( void )
+{
+ while (1) {
+ ULong res = 0;
+ ULong cflag = 0;
+ __asm__ __volatile__(
+ "movabsq $0x5555555555555555, %%r11 ; "
+ "movq $0, %%r12 ; "
+ "rdrand %%r11d ; "
+ "setc %%r12b ; "
+ "movq %%r11, %0 ; "
+ "movq %%r12, %1"
+ : "=r"(res), "=r"(cflag) : : "r11", "r12"
+ );
+ if (cflag == 1)
+ return res;
+ }
+ /*NOTREACHED*/
+}
+
+ULong do_rdrand16 ( void )
+{
+ while (1) {
+ ULong res = 0;
+ ULong cflag = 0;
+ __asm__ __volatile__(
+ "movabsq $0x5555555555555555, %%r11 ; "
+ "movq $0, %%r12 ; "
+ "rdrand %%r11w ; "
+ "setc %%r12b ; "
+ "movq %%r11, %0 ; "
+ "movq %%r12, %1"
+ : "=r"(res), "=r"(cflag) : : "r11", "r12"
+ );
+ if (cflag == 1)
+ return res;
+ }
+ /*NOTREACHED*/
+}
+
+void do_test ( ULong(*fn)(void),
+ ULong mask
+ /* with 1s indicating the random bits in the result */ )
+{
+ ULong arr[10];
+ for (UInt i = 0; i < 10; i++) {
+ arr[i] = fn();
+ }
+
+ // They really should all be different (to an extremely high probabilty.
+ // See comment above.
+ int allSame = 1/*true*/; // really, a Bool
+ for (UInt i = 1; i < 10; i++) {
+ if (arr[i] != arr[0]) {
+ allSame = 0/*false*/;
+ break;
+ }
+ }
+ assert(!allSame);
+
+ // The 0/32/48 leading bits of the result should have a particular value,
+ // depending on the insn. So print them, with the random part masked out.
+ for (UInt i = 0; i < 10; i++) {
+ printf("0x%016llx\n", arr[i] & ~mask);
+ }
+ printf("\n");
+}
+
+int main ( void )
+{
+ do_test( do_rdrand64, 0xFFFFFFFFFFFFFFFFULL );
+ do_test( do_rdrand32, 0x00000000FFFFFFFFULL );
+ do_test( do_rdrand16, 0x000000000000FFFFULL );
+ return 0;
+}
--- /dev/null
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+0x0000000000000000
+
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+0x5555555555550000
+
--- /dev/null
+prog: rdrand
+prereq: test -x rdrand && ../../../tests/x86_amd64_features amd64-rdrand
+vgopts: -q
// This file determines x86/AMD64 features a processor supports.
//
// We return:
-// - 0 if the machine matches the asked-for feature.
-// - 1 if the machine does not.
+// - 0 if the machine has the asked-for feature.
+// - 1 if the machine doesn't have the asked-for feature.
// - 2 if the asked-for feature isn't recognised (this will be the case for
// any feature if run on a non-x86/AMD64 machine).
// - 3 if there was a usage error (it also prints an error message).
level = 0x80000001;
cmask = 1 << 16;
require_amd = True;
+ } else if (strcmp (cpu, "amd64-f16c" ) == 0) {
+ level = 1;
+ cmask = 1 << 29;
+ } else if (strcmp (cpu, "amd64-rdrand" ) == 0) {
+ level = 1;
+ cmask = 1 << 30;
#endif
} else {
return UNRECOGNISED_FEATURE;