extern void amd64g_dirtyhelper_SxDT ( void* address,
ULong op /* 0 or 1 */ );
-extern ULong amd64g_dirtyhelper_ISTRI_08 ( VexGuestAMD64State*,
- HWord, HWord );
-extern ULong amd64g_dirtyhelper_ISTRI_0C ( VexGuestAMD64State*,
- HWord, HWord );
-extern ULong amd64g_dirtyhelper_ISTRI_3A ( VexGuestAMD64State*,
- HWord, HWord );
-extern ULong amd64g_dirtyhelper_ISTRI_4A ( VexGuestAMD64State*,
- HWord, HWord );
+/* Helps with PCMP{I,E}STR{I,M}.
+
+ CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
+ actually it could be a clean helper, but for the fact that we can't
+ pass by value 2 x V128 to a clean helper, nor have one returned.)
+ Reads guest state, writes to guest state for the xSTRM cases, no
+ accesses of memory, is a pure function.
+
+ opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
+ the callee knows which I/E and I/M variant it is dealing with and
+ what the specific operation is. 4th byte of opcode is in the range
+ 0x60 to 0x63:
+ istri 66 0F 3A 63
+ istrm 66 0F 3A 62
+ estri 66 0F 3A 61
+ estrm 66 0F 3A 60
+
+ gstOffL and gstOffR are the guest state offsets for the two XMM
+ register inputs. We never have to deal with the memory case since
+ that is handled by pre-loading the relevant value into the fake
+ XMM16 register.
+
+ For ESTRx variants, edxIN and eaxIN hold the values of those two
+ registers.
+
+ In all cases, the bottom 16 bits of the result contain the new
+ OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
+ result hold the new %ecx value. For xSTRM variants, the helper
+ writes the result directly to the guest XMM0.
+
+ Declarable side effects: in all cases, reads guest state at
+ [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
+ guest_XMM0.
+
+ Is expected to be called with opc_and_imm combinations which have
+ actually been validated, and will assert if otherwise. The front
+ end should ensure we're only called with verified values.
+*/
+extern ULong amd64g_dirtyhelper_PCMPxSTRx (
+ VexGuestAMD64State*,
+ HWord opc4_and_imm,
+ HWord gstOffL, HWord gstOffR,
+ HWord edxIN, HWord eaxIN
+ );
+
//extern void amd64g_dirtyhelper_CPUID_sse0 ( VexGuestAMD64State* );
//extern void amd64g_dirtyhelper_CPUID_sse1 ( VexGuestAMD64State* );
/*--- Helpers for SSE4.2 PCMP{E,I}STR{I,M} ---*/
/*---------------------------------------------------------------*/
-/* CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
- actually it could be a clean helper, but for the fact that we can't
- pass by value 2 x V128 to a clean helper.) Reads guest state, no
- writes to guest state, no accesses of memory, is a pure function.
- This relies on the property that the XMM regs are laid out
- consecutively in the guest state, so we can index into them here.
- Returned value (0 .. 16) is in the low 16 bits of the return value.
- Returned bits 31:16 hold the result OSZACP value.
-*/
-ULong amd64g_dirtyhelper_ISTRI_08 ( VexGuestAMD64State* gst,
- HWord gstOffL, HWord gstOffR )
+static UInt zmask_from_V128 ( V128* arg )
{
- U128* argL = (U128*)( ((UChar*)gst) + gstOffL );
- U128* argR = (U128*)( ((UChar*)gst) + gstOffR );
- return (HWord) compute_ISTRI_08( argL, argR );
+ UInt i, res = 0;
+ for (i = 0; i < 16; i++) {
+ res |= ((arg->w8[i] == 0) ? 1 : 0) << i;
+ }
+ return res;
}
-ULong amd64g_dirtyhelper_ISTRI_0C ( VexGuestAMD64State* gst,
- HWord gstOffL, HWord gstOffR )
-{
- U128* argL = (U128*)( ((UChar*)gst) + gstOffL );
- U128* argR = (U128*)( ((UChar*)gst) + gstOffR );
- return (HWord) compute_ISTRI_0C( argL, argR );
-}
+/* Helps with PCMP{I,E}STR{I,M}.
-ULong amd64g_dirtyhelper_ISTRI_3A ( VexGuestAMD64State* gst,
- HWord gstOffL, HWord gstOffR )
+ CALLED FROM GENERATED CODE: DIRTY HELPER(s). (But not really,
+ actually it could be a clean helper, but for the fact that we can't
+ pass by value 2 x V128 to a clean helper, nor have one returned.)
+ Reads guest state, writes to guest state for the xSTRM cases, no
+ accesses of memory, is a pure function.
+
+ opc_and_imm contains (4th byte of opcode << 8) | the-imm8-byte so
+ the callee knows which I/E and I/M variant it is dealing with and
+ what the specific operation is. 4th byte of opcode is in the range
+ 0x60 to 0x63:
+ istri 66 0F 3A 63
+ istrm 66 0F 3A 62
+ estri 66 0F 3A 61
+ estrm 66 0F 3A 60
+
+ gstOffL and gstOffR are the guest state offsets for the two XMM
+ register inputs. We never have to deal with the memory case since
+ that is handled by pre-loading the relevant value into the fake
+ XMM16 register.
+
+ For ESTRx variants, edxIN and eaxIN hold the values of those two
+ registers.
+
+ In all cases, the bottom 16 bits of the result contain the new
+ OSZACP %rflags values. For xSTRI variants, bits[31:16] of the
+ result hold the new %ecx value. For xSTRM variants, the helper
+ writes the result directly to the guest XMM0.
+
+ Declarable side effects: in all cases, reads guest state at
+ [gstOffL, +16) and [gstOffR, +16). For xSTRM variants, also writes
+ guest_XMM0.
+
+ Is expected to be called with opc_and_imm combinations which have
+ actually been validated, and will assert if otherwise. The front
+ end should ensure we're only called with verified values.
+*/
+ULong amd64g_dirtyhelper_PCMPxSTRx (
+ VexGuestAMD64State* gst,
+ HWord opc4_and_imm,
+ HWord gstOffL, HWord gstOffR,
+ HWord edxIN, HWord eaxIN
+ )
{
- U128* argL = (U128*)( ((UChar*)gst) + gstOffL );
- U128* argR = (U128*)( ((UChar*)gst) + gstOffR );
- return (HWord) compute_ISTRI_3A( argL, argR );
-}
+ HWord opc4 = (opc4_and_imm >> 8) & 0xFF;
+ HWord imm8 = opc4_and_imm & 0xFF;
+ HWord isISTRx = opc4 & 2;
+ HWord isxSTRM = (opc4 & 1) ^ 1;
+ vassert((opc4 & 0xFC) == 0x60); /* 0x60 .. 0x63 */
+ vassert((imm8 & 1) == 0); /* we support byte-size cases only */
+
+ // where the args are
+ V128* argL = (V128*)( ((UChar*)gst) + gstOffL );
+ V128* argR = (V128*)( ((UChar*)gst) + gstOffR );
+
+ /* Create the arg validity masks, either from the vectors
+ themselves or from the supplied edx/eax values. */
+ // FIXME: this is only right for the 8-bit data cases.
+ // At least that is asserted above.
+ UInt zmaskL, zmaskR;
+ if (isISTRx) {
+ zmaskL = zmask_from_V128(argL);
+ zmaskR = zmask_from_V128(argR);
+ } else {
+ Int tmp;
+ tmp = edxIN & 0xFFFFFFFF;
+ if (tmp < -16) tmp = -16;
+ if (tmp > 16) tmp = 16;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 16);
+ zmaskL = (1 << tmp) & 0xFFFF;
+ tmp = eaxIN & 0xFFFFFFFF;
+ if (tmp < -16) tmp = -16;
+ if (tmp > 16) tmp = 16;
+ if (tmp < 0) tmp = -tmp;
+ vassert(tmp >= 0 && tmp <= 16);
+ zmaskR = (1 << tmp) & 0xFFFF;
+ }
-ULong amd64g_dirtyhelper_ISTRI_4A ( VexGuestAMD64State* gst,
- HWord gstOffL, HWord gstOffR )
-{
- U128* argL = (U128*)( ((UChar*)gst) + gstOffL );
- U128* argR = (U128*)( ((UChar*)gst) + gstOffR );
- return (HWord) compute_ISTRI_4A( argL, argR );
+ // temp spot for the resulting flags and vector.
+ V128 resV;
+ UInt resOSZACP;
+
+ // do the meyaath
+ Bool ok = compute_PCMPxSTRx (
+ &resV, &resOSZACP, argL, argR,
+ zmaskL, zmaskR, imm8, (Bool)isxSTRM
+ );
+
+ // front end shouldn't pass us any imm8 variants we can't
+ // handle. Hence:
+ vassert(ok);
+
+ // So, finally we need to get the results back to the caller.
+ // In all cases, the new OSZACP value is the lowest 16 of
+ // the return value.
+ if (isxSTRM) {
+ /* gst->guest_XMM0 = resV; */ // gcc don't like that
+ gst->guest_XMM0[0] = resV.w32[0];
+ gst->guest_XMM0[1] = resV.w32[1];
+ gst->guest_XMM0[2] = resV.w32[2];
+ gst->guest_XMM0[3] = resV.w32[3];
+ return resOSZACP & 0x8D5;
+ } else {
+ UInt newECX = resV.w32[0] & 0xFFFF;
+ return (newECX << 16) | (resOSZACP & 0x8D5);
+ }
}
goto decode_success;
}
+ /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
+ Extract byte from r32/m8 and insert into xmm1 */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x20 ) {
+
+ Int imm8;
+ IRTemp new8 = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ if ( epartIsReg( modrm ) ) {
+ imm8 = (Int)(insn[3+1] & 0xF);
+ assign( new8, binop(Iop_And64,
+ unop(Iop_32Uto64,
+ getIReg32(eregOfRexRM(pfx,modrm))),
+ mkU64(0xFF)));
+ delta += 3+1+1;
+ DIP( "pinsrb $%d,%s,%s\n", imm8,
+ nameIReg32( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 1 );
+ imm8 = (Int)(insn[3+alen] & 0xF);
+ assign( new8, unop(Iop_8Uto64, loadLE( Ity_I8, mkexpr(addr) )));
+ delta += 3+alen+1;
+ DIP( "pinsrb $%d,%s,%s\n",
+ imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ // Create a V128 value which has the selected byte in the
+ // specified lane, and zeroes everywhere else.
+ IRTemp tmp128 = newTemp(Ity_V128);
+ IRTemp halfshift = newTemp(Ity_I64);
+ assign(halfshift, binop(Iop_Shl64,
+ mkexpr(new8), mkU8(8 * (imm8 & 7))));
+ vassert(imm8 >= 0 && imm8 <= 15);
+ if (imm8 < 8) {
+ assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
+ } else {
+ assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
+ }
+
+ UShort mask = ~(1 << imm8);
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_OrV128,
+ mkexpr(tmp128),
+ binop( Iop_AndV128,
+ getXMMReg( gregOfRexRM(pfx, modrm) ),
+ mkV128(mask) ) ) );
+
+ goto decode_success;
+ }
+
/* 66 0F 38 37 = PCMPGTQ
64x2 comparison (signed, presumably; the Intel docs don't say :-)
*/
}
/* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
+ 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
+ 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
+ 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
(selected special cases that actually occur in glibc,
not by any means a complete implementation.)
*/
if (have66noF2noF3(pfx)
&& sz == 2
&& insn[0] == 0x0F && insn[1] == 0x3A
- && insn[2] == 0x63) {
+ && (insn[2] >= 0x60 && insn[2] <= 0x63)) {
+ UInt isISTRx = insn[2] & 2;
+ UInt isxSTRM = (insn[2] & 1) ^ 1;
UInt regNoL = 0;
UInt regNoR = 0;
UChar imm = 0;
}
/* Now we know the XMM reg numbers for the operands, and the
- immediate byte. Is it one we can actually handle? */
- void* fn = NULL;
- HChar* nm = NULL;
+ immediate byte. Is it one we can actually handle? Throw out
+ any cases for which the helper function has not been
+ verified. */
switch (imm) {
- case 0x08: fn = &amd64g_dirtyhelper_ISTRI_08;
- nm = "amd64g_dirtyhelper_ISTRI_08"; break;
- case 0x0C: fn = &amd64g_dirtyhelper_ISTRI_0C;
- nm = "amd64g_dirtyhelper_ISTRI_0C"; break;
- case 0x3A: fn = &amd64g_dirtyhelper_ISTRI_3A;
- nm = "amd64g_dirtyhelper_ISTRI_3A"; break;
- case 0x4A: fn = &amd64g_dirtyhelper_ISTRI_4A;
- nm = "amd64g_dirtyhelper_ISTRI_4A"; break;
- default: goto decode_failure;
- }
- vassert(fn); vassert(nm);
+ case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
+ case 0x1A: case 0x3A: case 0x44: case 0x4A:
+ break;
+ default:
+ goto decode_failure;
+ }
+
+ /* Who ya gonna call? Presumably not Ghostbusters. */
+ void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
+ HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
+ /* Round up the arguments. Note that this is a kludge -- the
+ use of mkU64 rather than mkIRExpr_HWord implies the
+ assumption that the host's word size is 64-bit. */
UInt gstOffL = regNoL == 16 ? OFFB_XMM16 : xmmGuestRegOffset(regNoL);
UInt gstOffR = xmmGuestRegOffset(regNoR);
- IRTemp resT = newTemp(Ity_I64);
- IRDirty* d
- = unsafeIRDirty_1_N( resT, 0/*regparms*/,
- nm, fn,
- mkIRExprVec_2( mkIRExpr_HWord(gstOffL),
- mkIRExpr_HWord(gstOffR)) );
+ IRExpr* opc4_and_imm = mkU64((insn[2] << 8) | (imm & 0xFF));
+ IRExpr* gstOffLe = mkU64(gstOffL);
+ IRExpr* gstOffRe = mkU64(gstOffR);
+ IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
+ IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
+ IRExpr** args
+ = mkIRExprVec_5( opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
+
+ IRTemp resT = newTemp(Ity_I64);
+ IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
/* It's not really a dirty call, but we can't use the clean
helper mechanism here for the very lame reason that we can't
- pass 2 x V128s by value to a helper. Hence this roundabout
- scheme. */
+ pass 2 x V128s by value to a helper, nor get one back. Hence
+ this roundabout scheme. */
d->needsBBP = True;
d->nFxState = 2;
d->fxState[0].fx = Ifx_Read;
d->fxState[1].fx = Ifx_Read;
d->fxState[1].offset = gstOffR;
d->fxState[1].size = sizeof(U128);
+ if (isxSTRM) {
+ /* Declare that the helper writes XMM0. */
+ d->nFxState = 3;
+ d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].offset = xmmGuestRegOffset(0);
+ d->fxState[2].size = sizeof(U128);
+ }
+
stmt( IRStmt_Dirty(d) );
- /* Now resT[15:0] holds what the Intel docs call IntRes2, and
- resT[31:16] holds the new OSZACP values. We must park the
- resultin ECX and update the condition codes. */
- putIReg64(R_RCX, binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF)));
+ /* Now resT[15:0] holds the new OSZACP values, so the condition
+ codes must be updated. And for a xSTRI case, resT[31:16]
+ holds the new ECX value, so stash that too. */
+ if (!isxSTRM) {
+ putIReg64(R_RCX, binop(Iop_And64,
+ binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
+ mkU64(0xFFFF)));
+ }
stmt( IRStmt_Put(
OFFB_CC_DEP1,
- binop(Iop_And64, binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
- mkU64(0xFFFF))
+ binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
));
stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
if (regNoL == 16) {
- DIP("pcmpistri $%x,%s,%s\n",
+ DIP("pcmp%cstr%c $%x,%s,%s\n",
+ isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
(UInt)imm, dis_buf, nameXMMReg(regNoR));
} else {
- DIP("pcmpistri $%x,%s,%s\n",
+ DIP("pcmp%cstr%c $%x,%s,%s\n",
+ isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
(UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
}
goto decode_success;
}
+
+ /* 66 0f 38 17 /r = PTEST xmm1, xmm2/m128
+ Logical compare (set ZF and CF from AND/ANDN of the operands) */
+ if (have66noF2noF3( pfx ) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x17) {
+ modrm = insn[3];
+ IRTemp vecE = newTemp(Ity_V128);
+ IRTemp vecG = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
+ delta += 3+1;
+ DIP( "ptest %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
+ delta += 3+alen;
+ DIP( "ptest %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
+
+ /* Set Z=1 iff (vecE & vecG) == 0
+ Set C=1 iff (vecE & not vecG) == 0
+ */
+
+ /* andV, andnV: vecE & vecG, vecE and not(vecG) */
+ IRTemp andV = newTemp(Ity_V128);
+ IRTemp andnV = newTemp(Ity_V128);
+ assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
+ assign(andnV, binop(Iop_AndV128,
+ mkexpr(vecE),
+ binop(Iop_XorV128, mkexpr(vecG),
+ mkV128(0xFFFF))));
+
+ /* The same, but reduced to 64-bit values, by or-ing the top
+ and bottom 64-bits together. It relies on this trick:
+
+ InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
+
+ InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
+ InterleaveHI64x2([a,b],[a,b]) == [a,a]
+
+ and so the OR of the above 2 exprs produces
+ [a OR b, a OR b], from which we simply take the lower half.
+ */
+ IRTemp and64 = newTemp(Ity_I64);
+ IRTemp andn64 = newTemp(Ity_I64);
+
+ assign(
+ and64,
+ unop(Iop_V128to64,
+ binop(Iop_OrV128,
+ binop(Iop_InterleaveLO64x2, mkexpr(andV), mkexpr(andV)),
+ binop(Iop_InterleaveHI64x2, mkexpr(andV), mkexpr(andV))
+ )
+ )
+ );
+
+ assign(
+ andn64,
+ unop(Iop_V128to64,
+ binop(Iop_OrV128,
+ binop(Iop_InterleaveLO64x2, mkexpr(andnV), mkexpr(andnV)),
+ binop(Iop_InterleaveHI64x2, mkexpr(andnV), mkexpr(andnV))
+ )
+ )
+ );
+
+ /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
+ slice out the Z and C bits conveniently. We use the standard
+ trick all-zeroes -> all-zeroes, anything-else -> all-ones
+ done by "(x | -x) >>s (word-size - 1)".
+ */
+ IRTemp z64 = newTemp(Ity_I64);
+ IRTemp c64 = newTemp(Ity_I64);
+ assign(z64,
+ unop(Iop_Not64,
+ binop(Iop_Sar64,
+ binop(Iop_Or64,
+ binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
+ mkexpr(and64)
+ ),
+ mkU8(63)))
+ );
+
+ assign(c64,
+ unop(Iop_Not64,
+ binop(Iop_Sar64,
+ binop(Iop_Or64,
+ binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
+ mkexpr(andn64)
+ ),
+ mkU8(63)))
+ );
+
+ /* And finally, slice out the Z and C flags and set the flags
+ thunk to COPY for them. OSAP are set to zero. */
+ IRTemp newOSZACP = newTemp(Ity_I64);
+ assign(newOSZACP,
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
+ binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))
+ )
+ );
+
+ stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
+ stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
+ stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+ stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+
+ goto decode_success;
+ }
+
+
/* ---------------------------------------------------- */
/* --- end of the SSE4 decoder --- */
/* ---------------------------------------------------- */
fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
/* This is a Core-2-like machine */
- /* fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16"; */
- /* fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16; */
+ //fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
+ //fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
/* This is a Core-i5-like machine */
}
else {
/* We need the definitions for OSZACP eflags/rflags offsets.
#including guest_{amd64,x86}_defs.h causes chaos, so just copy the
- require values directly. They are not going to change in the
- future :-)
+ required values directly. They are not going to change in the
+ foreseeable future :-)
*/
+
#define SHIFT_O 11
#define SHIFT_S 7
#define SHIFT_Z 6
return 32 - clz32((~x) & (x-1));
}
-
-/* Do the computations for SSE4.2 ISTRI_XX. Not called directly from
- generated code. Pure function, reads *argLU and *argRU, returned
- value (0 .. 16) is in the low 16 bits of the return value.
- Returned bits 31:16 hold the result OSZACP value.
-*/
-UInt compute_ISTRI_08 ( U128* argLU, U128* argRU )
+/* Convert a 4-bit value to a 32-bit value by cloning each bit 8
+ times. There's surely a better way to do this, but I don't know
+ what it is. */
+static UInt bits4_to_bytes4 ( UInt bits4 )
{
- /* unsigned bytes (also works for unsigned)
- equal each (straightforward parallel compare)
- polarity + (IntRes2 = IntRes1)
- index 0 (want index of ls 1 bit)
- */
- Int i;
- UChar* argL = (UChar*)argLU;
- UChar* argR = (UChar*)argRU;
- UInt boolResII = 0, zmaskL = 0, zmaskR = 0;
- for (i = 15; i >= 0; i--) {
- UChar cL = argL[i];
- UChar cR = argR[i];
- zmaskL = (zmaskL << 1) | (cL == 0 ? 1 : 0);
- zmaskR = (zmaskR << 1) | (cR == 0 ? 1 : 0);
- boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
- }
- UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
- UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
-
- // do invalidation, common to all equal-each cases
- UInt intRes1
- = (boolResII & validL & validR) // if both valid, use cmpres
- | (~ (validL | validR)); // if both invalid, force 1
- // else force 0
- intRes1 &= 0xFFFF;
-
- // polarity: +
- UInt intRes2 = intRes1;
-
- // generate ecx value, common to all index-of-ls-1-bit cases
- UInt newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
-
- // generate new flags, common to all ISTRI and ISTRM cases
- UInt newFlags // A, P are zero
- = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
- | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
- | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
- | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
-
- return (newFlags << 16) | newECX;
+ UInt r = 0;
+ r |= (bits4 & 1) ? 0x000000FF : 0;
+ r |= (bits4 & 2) ? 0x0000FF00 : 0;
+ r |= (bits4 & 4) ? 0x00FF0000 : 0;
+ r |= (bits4 & 8) ? 0xFF000000 : 0;
+ return r;
}
-UInt compute_ISTRI_0C ( U128* argLU, U128* argRU )
+/* Given partial results from a pcmpXstrX operation (intRes1,
+ basically), generate an I- or M-format output value, also the new
+ OSZACP flags. */
+static
+void compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ UInt intRes1,
+ UInt zmaskL, UInt zmaskR,
+ UInt validL,
+ UInt pol, UInt idx,
+ Bool isxSTRM )
{
- /* unsigned bytes
- equal ordered (substring search)
- polarity + (IntRes2 = IntRes1)
- index 0 (want index of ls 1 bit)
+ vassert((pol >> 2) == 0);
+ vassert((idx >> 1) == 0);
+
+ UInt intRes2 = 0;
+ switch (pol) {
+ case 0: intRes2 = intRes1; break; // pol +
+ case 1: intRes2 = ~intRes1; break; // pol -
+ case 2: intRes2 = intRes1; break; // pol m+
+ case 3: intRes2 = intRes1 ^ validL; break; // pol m-
+ }
+ intRes2 &= 0xFFFF;
+
+ if (isxSTRM) {
+
+ // generate M-format output (a bit or byte mask in XMM0)
+ if (idx) {
+ resV->w32[0] = bits4_to_bytes4( (intRes2 >> 0) & 0xF );
+ resV->w32[1] = bits4_to_bytes4( (intRes2 >> 4) & 0xF );
+ resV->w32[2] = bits4_to_bytes4( (intRes2 >> 8) & 0xF );
+ resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF );
+ } else {
+ resV->w32[0] = intRes2 & 0xFFFF;
+ resV->w32[1] = 0;
+ resV->w32[2] = 0;
+ resV->w32[3] = 0;
+ }
- argL: haystack, argR: needle
- */
- UInt i, hi, ni;
- UChar* argL = (UChar*)argLU;
- UChar* argR = (UChar*)argRU;
- UInt boolRes = 0, zmaskL = 0, zmaskR = 0;
- UInt keepSearching = 1;
- for (i = 0; i < 16; i++) {
- UChar cL = argL[i];
- UChar cR = argR[i];
- zmaskL = (zmaskL >> 1) | (cL == 0 ? (1 << 15) : 0);
- zmaskR = (zmaskR >> 1) | (cR == 0 ? (1 << 15) : 0);
-
- if (argL[i] == 0) {
- // run off the end of the haystack.
- keepSearching = 0;
- }
-
- UInt m = 1;
- if (keepSearching) {
- for (ni = 0; ni < 16; ni++) {
- if (argR[ni] == 0) break;
- hi = ni + i;
- if (hi >= 16) break;
- if (argL[hi] != argR[ni]) { m = 0; break; }
- }
+ } else {
+
+ // generate I-format output (an index in ECX)
+ // generate ecx value
+ UInt newECX = 0;
+ if (idx) {
+ // index of ms-1-bit
+ newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
} else {
- m = 0;
+ // index of ls-1-bit
+ newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
}
- boolRes = (boolRes >> 1) | (m << 15);
+
+ resV->w32[0] = newECX;
+ resV->w32[1] = 0;
+ resV->w32[2] = 0;
+ resV->w32[3] = 0;
}
- // boolRes is "pre-invalidated"
- UInt intRes1 = boolRes & 0xFFFF;
+ // generate new flags, common to all ISTRI and ISTRM cases
+ *resOSZACP // A, P are zero
+ = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
+ | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
+ | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
+ | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
+}
+
- // polarity: +
- UInt intRes2 = intRes1;
+/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
+ variants.
- // generate ecx value, common to all index-of-ls-1-bit cases
- UInt newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
+ For xSTRI variants, the new ECX value is placed in the 32 bits
+ pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
+ variants, the result is a 128 bit value and is placed at *resV in
+ the obvious way.
- // generate new flags, common to all ISTRI and ISTRM cases
- UInt newFlags // A, P are zero
- = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
- | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
- | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
- | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
+ For all variants, the new OSZACP value is placed at *resOSZACP.
- return (newFlags << 16) | newECX;
-}
+ argLV and argRV are the vector args. The caller must prepare a
+ 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
+ must be 1 for each zero byte of of the respective arg. For ESTRx
+ variants this is derived from the explicit length indication, and
+ must be 0 in all places except at the bit index corresponding to
+ the valid length (0 .. 16). If the valid length is 16 then the
+ mask must be all zeroes. In all cases, bits 31:16 must be zero.
+ imm8 is the original immediate from the instruction. isSTRM
+ indicates whether this is a xSTRM or xSTRI variant, which controls
+ how much of *res is written.
-UInt compute_ISTRI_3A ( U128* argLU, U128* argRU )
+ If the given imm8 case can be handled, the return value is True.
+ If not, False is returned, and neither *res not *resOSZACP are
+ altered.
+*/
+
+Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ V128* argLV, V128* argRV,
+ UInt zmaskL, UInt zmaskR,
+ UInt imm8, Bool isxSTRM )
{
- /* signed bytes (also works for unsigned)
- equal each (straightforward parallel compare)
- polarity Masked- (IntRes2 = IntRes1 ^ validL)
- index 0 (want index of ls 1 bit)
- */
- Int i;
- UChar* argL = (UChar*)argLU;
- UChar* argR = (UChar*)argRU;
- UInt boolResII = 0, zmaskL = 0, zmaskR = 0;
- for (i = 15; i >= 0; i--) {
- UChar cL = argL[i];
- UChar cR = argR[i];
- zmaskL = (zmaskL << 1) | (cL == 0 ? 1 : 0);
- zmaskR = (zmaskR << 1) | (cR == 0 ? 1 : 0);
- boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
+ vassert(imm8 < 0x80);
+ vassert((zmaskL >> 16) == 0);
+ vassert((zmaskR >> 16) == 0);
+
+ /* Explicitly reject any imm8 values that haven't been validated,
+ even if they would probably work. Life is too short to have
+ unvalidated cases in the code base. */
+ switch (imm8) {
+ case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
+ case 0x1A: case 0x3A: case 0x44: case 0x4A:
+ break;
+ default:
+ return False;
}
- UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
- UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
- // do invalidation, common to all equal-each cases
- UInt intRes1
- = (boolResII & validL & validR) // if both valid, use cmpres
- | (~ (validL | validR)); // if both invalid, force 1
- // else force 0
- intRes1 &= 0xFFFF;
+ UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
+ UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
+ UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
+ UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
+
+ /*----------------------------------------*/
+ /*-- strcmp on byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 2/*equal each, aka strcmp*/
+ && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
+ Int i;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolResII = 0;
+ for (i = 15; i >= 0; i--) {
+ UChar cL = argL[i];
+ UChar cR = argR[i];
+ boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
+ }
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ // do invalidation, common to all equal-each cases
+ UInt intRes1
+ = (boolResII & validL & validR) // if both valid, use cmpres
+ | (~ (validL | validR)); // if both invalid, force 1
+ // else force 0
+ intRes1 &= 0xFFFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
+ }
- // polarity: Masked-
- UInt intRes2 = (intRes1 ^ validL) & 0xFFFF;
+ /*----------------------------------------*/
+ /*-- set membership on byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 0/*equal any, aka find chars in a set*/
+ && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
+ /* argL: the string, argR: charset */
+ UInt si, ci;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+
+ for (si = 0; si < 16; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string.
+ break;
+ UInt m = 0;
+ for (ci = 0; ci < 16; ci++) {
+ if ((validR & (1 << ci)) == 0) break;
+ if (argR[ci] == argL[si]) { m = 1; break; }
+ }
+ boolRes |= (m << si);
+ }
- // generate ecx value, common to all index-of-ls-1-bit cases
- UInt newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFFFF;
+
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
- // generate new flags, common to all ISTRI and ISTRM cases
- UInt newFlags // A, P are zero
- = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
- | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
- | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
- | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
+ return True;
+ }
- return (newFlags << 16) | newECX;
-}
+ /*----------------------------------------*/
+ /*-- substring search on byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 3/*equal ordered, aka substring search*/
+ && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
+
+ /* argL: haystack, argR: needle */
+ UInt ni, hi;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (hi = 0; hi < 16; hi++) {
+ if ((validL & (1 << hi)) == 0)
+ // run off the end of the haystack
+ break;
+ UInt m = 1;
+ for (ni = 0; ni < 16; ni++) {
+ if ((validR & (1 << ni)) == 0) break;
+ UInt i = ni + hi;
+ if (i >= 16) break;
+ if (argL[i] != argR[ni]) { m = 0; break; }
+ }
+ boolRes |= (m << hi);
+ }
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFFFF;
-UInt compute_ISTRI_4A ( U128* argLU, U128* argRU )
-{
- /* signed bytes (also works for unsigned)
- equal each (straightforward parallel compare)
- polarity + (IntRes2 = IntRes1)
- index 1 (want index of ms 1 bit)
- */
- Int i;
- UChar* argL = (UChar*)argLU;
- UChar* argR = (UChar*)argRU;
- UInt boolResII = 0, zmaskL = 0, zmaskR = 0;
- for (i = 15; i >= 0; i--) {
- UChar cL = argL[i];
- UChar cR = argR[i];
- zmaskL = (zmaskL << 1) | (cL == 0 ? 1 : 0);
- zmaskR = (zmaskR << 1) | (cR == 0 ? 1 : 0);
- boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
+
+ return True;
}
- UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
- UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
- // do invalidation, common to all equal-each cases
- UInt intRes1
- = (boolResII & validL & validR) // if both valid, use cmpres
- | (~ (validL | validR)); // if both invalid, force 1
- // else force 0
- intRes1 &= 0xFFFF;
+ /*----------------------------------------*/
+ /*-- ranges, unsigned byte data --*/
+ /*----------------------------------------*/
+
+ if (agg == 1/*ranges*/
+ && fmt == 0/*ub*/) {
+
+ /* argL: string, argR: range-pairs */
+ UInt ri, si;
+ UChar* argL = (UChar*)argLV;
+ UChar* argR = (UChar*)argRV;
+ UInt boolRes = 0;
+ UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
+ UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
+ for (si = 0; si < 16; si++) {
+ if ((validL & (1 << si)) == 0)
+ // run off the end of the string
+ break;
+ UInt m = 0;
+ for (ri = 0; ri < 16; ri += 2) {
+ if ((validR & (3 << ri)) != (3 << ri)) break;
+ if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
+ m = 1; break;
+ }
+ }
+ boolRes |= (m << si);
+ }
- // polarity
- UInt intRes2 = intRes1;
+ // boolRes is "pre-invalidated"
+ UInt intRes1 = boolRes & 0xFFFF;
- // generate ecx value, common to all index-of-ms-1-bit cases
- UInt newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
+ // generate I-format output
+ compute_PCMPxSTRx_gen_output(
+ resV, resOSZACP,
+ intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
+ );
- // generate new flags, common to all ISTRI and ISTRM cases
- UInt newFlags // A, P are zero
- = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
- | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
- | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
- | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
+ return True;
+ }
- return (newFlags << 16) | newECX;
+ return False;
}
generated code. CLEAN HELPER. */
extern ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp );
-/* Do the computations for SSE4.2 ISTRI_XX. Not called directly from
- generated code. Pure function, reads *argLU and *argRU, returned
- value (0 .. 16) is in the low 16 bits of the return value.
- Returned bits 31:16 hold the result OSZACP value. */
-extern UInt compute_ISTRI_08 ( U128* argLU, U128* argRU );
-extern UInt compute_ISTRI_0C ( U128* argLU, U128* argRU );
-extern UInt compute_ISTRI_3A ( U128* argLU, U128* argRU );
-extern UInt compute_ISTRI_4A ( U128* argLU, U128* argRU );
-
+/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
+ variants. See bigger comment on implementation of this function
+ for details on call/return conventions. */
+extern Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
+ /*OUT*/UInt* resOSZACP,
+ V128* argLV, V128* argRV,
+ UInt zmaskL, UInt zmaskR,
+ UInt imm8, Bool isxSTRM );
#endif /* ndef __VEX_GUEST_GENERIC_X87_H */
}
+/* Expand the given byte into a 64-bit word, by cloning each bit
+ 8 times. */
+static ULong bitmask8_to_bytemask64 ( UShort w8 )
+{
+ vassert(w8 == (w8 & 0xFF));
+ ULong w64 = 0;
+ Int i;
+ for (i = 0; i < 8; i++) {
+ if (w8 & (1<<i))
+ w64 |= (0xFFULL << (8 * i));
+ }
+ return w64;
+}
+
+
//.. /* Round an x87 FPU value to 53-bit-mantissa precision, to be used
//.. after most non-simple FPU operations (simple = +, -, *, / and
//.. sqrt).
switch (e->Iex.Const.con->Ico.V128) {
case 0x0000:
dst = generate_zeroes_V128(env);
- return dst;
+ break;
case 0xFFFF:
dst = generate_ones_V128(env);
- return dst;
- default:
- break;
- }
- AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
- const ULong const_z64 = 0x0000000000000000ULL;
- const ULong const_o64 = 0xFFFFFFFFFFFFFFFFULL;
- const ULong const_z32o32 = 0x00000000FFFFFFFFULL;
- const ULong const_o32z32 = 0xFFFFFFFF00000000ULL;
- switch (e->Iex.Const.con->Ico.V128) {
- case 0x0000: case 0xFFFF:
- vassert(0); /* handled just above */
- /* do push_uimm64 twice, first time for the high-order half. */
- case 0x00F0:
- push_uimm64(env, const_z64);
- push_uimm64(env, const_o32z32);
- break;
- case 0x00FF:
- push_uimm64(env, const_z64);
- push_uimm64(env, const_o64);
- break;
- case 0x000F:
- push_uimm64(env, const_z64);
- push_uimm64(env, const_z32o32);
break;
- case 0x0F00:
- push_uimm64(env, const_z32o32);
- push_uimm64(env, const_z64);
- break;
- case 0x0F0F:
- push_uimm64(env, const_z32o32);
- push_uimm64(env, const_z32o32);
- break;
- case 0x0FF0:
- push_uimm64(env, const_z32o32);
- push_uimm64(env, const_o32z32);
- break;
- case 0x0FFF:
- push_uimm64(env, const_z32o32);
- push_uimm64(env, const_o64);
- break;
- case 0xF000:
- push_uimm64(env, const_o32z32);
- push_uimm64(env, const_z64);
- break;
- case 0xF00F:
- push_uimm64(env, const_o32z32);
- push_uimm64(env, const_z32o32);
- break;
- case 0xF0F0:
- push_uimm64(env, const_o32z32);
- push_uimm64(env, const_o32z32);
- break;
- case 0xF0FF:
- push_uimm64(env, const_o32z32);
- push_uimm64(env, const_o64);
- break;
- case 0xFF00:
- push_uimm64(env, const_o64);
- push_uimm64(env, const_z64);
- break;
- case 0xFF0F:
- push_uimm64(env, const_o64);
- push_uimm64(env, const_z32o32);
- break;
- case 0xFFF0:
- push_uimm64(env, const_o64);
- push_uimm64(env, const_o32z32);
+ default: {
+ AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
+ /* do push_uimm64 twice, first time for the high-order half. */
+ push_uimm64(env, bitmask8_to_bytemask64(
+ (e->Iex.Const.con->Ico.V128 >> 8) & 0xFF
+ ));
+ push_uimm64(env, bitmask8_to_bytemask64(
+ (e->Iex.Const.con->Ico.V128 >> 0) & 0xFF
+ ));
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
+ add_to_rsp(env, 16);
break;
- default:
- goto vec_fail;
+ }
}
- addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, rsp0 ));
- add_to_rsp(env, 16);
return dst;
}
return dst;
}
- vec_fail:
+ //vec_fail:
vex_printf("iselVecExpr (amd64, subarch = %s): can't reduce\n",
LibVEX_ppVexHwCaps(VexArchAMD64, env->hwcaps));
ppIRExpr(e);
#ifndef __VEX_HOST_GENERIC_SIMD128_H
#define __VEX_HOST_GENERIC_SIMD128_H
-/* A union for doing 128-bit primitives conveniently. It is not
- public and so not placed in pub/. */
-typedef
- union {
- UChar w8[16];
- UShort w16[8];
- UInt w32[4];
- ULong w64[2];
- }
- V128;
-
-
#include "libvex_basictypes.h"
/* DO NOT MAKE THESE INTO REGPARM FNS! THIS WILL BREAK CALLING
/* Always 128 bits. */
typedef UInt U128[4];
+/* A union for doing 128-bit vector primitives conveniently. */
+typedef
+ union {
+ UChar w8[16];
+ UShort w16[8];
+ UInt w32[4];
+ ULong w64[2];
+ }
+ V128;
+/* Floating point. */
typedef float Float; /* IEEE754 single-precision (32-bit) value */
typedef double Double; /* IEEE754 double-precision (64-bit) value */