/*--- Misc integer helpers, including rotates and CPUID. ---*/
/*---------------------------------------------------------------*/
-/* Claim to be the following CPU:
- vendor_id : AuthenticAMD
- cpu family : 15
- model : 12
- model name : AMD Athlon(tm) 64 Processor 3200+
- stepping : 0
- cpu MHz : 2202.917
- cache size : 512 KB
+/* Claim to be the following CPU (2 x ...):
+ vendor_id : GenuineIntel
+ cpu family : 6
+ model : 15
+ model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
+ stepping : 6
+ cpu MHz : 2394.000
+ cache size : 4096 KB
+ physical id : 0
+ siblings : 2
+ core id : 0
+ cpu cores : 2
fpu : yes
fpu_exception : yes
- cpuid level : 1
+ cpuid level : 10
wp : yes
- flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr
- pge mca cmov pat pse36 clflush mmx fxsr sse sse2
- pni syscall nx mmxext lm 3dnowext 3dnow
- bogomips : 4308.99
- TLB size : 1088 4K pages
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush dts acpi
+ mmx fxsr sse sse2 ss ht tm syscall nx lm
+ constant_tsc pni monitor ds_cpl vmx est tm2
+ cx16 xtpr lahf_lm
+ bogomips : 4798.78
clflush size : 64
cache_alignment : 64
- address sizes : 40 bits physical, 48 bits virtual
- power management: ts fid vid ttp
+ address sizes : 36 bits physical, 48 bits virtual
+ power management:
*/
void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st )
{
} while (0)
switch (0xFFFFFFFF & st->guest_RAX) {
- case 0x0:
- SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65);
+ case 0x00000000:
+ SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
break;
- case 0x1:
- SET_ABCD(0x00000fc0, 0x00000800, 0x00000000, 0x078bfbff);
+ case 0x00000001:
+ SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
break;
- case 0x80000000:
- SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65);
+ case 0x00000002:
+ SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
break;
- case 0x80000001:
- SET_ABCD(0x00000fc0, 0x0000010a, 0x00000000, 0xe1d3fbff);
+ case 0x00000003:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
- case 0x80000002:
- SET_ABCD(0x20444d41, 0x6c687441, 0x74286e6f, 0x3620296d);
+ case 0x00000004: {
+ switch (0xFFFFFFFF & st->guest_RCX) {
+ case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
+ 0x00000fff, 0x00000001); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
break;
- case 0x80000003:
- SET_ABCD(0x72502034, 0x7365636f, 0x20726f73, 0x30303233);
+ }
+ case 0x00000005:
+ SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
break;
- case 0x80000004:
- SET_ABCD(0x0000002b, 0x00000000, 0x00000000, 0x00000000);
+ case 0x00000006:
+ SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
break;
- case 0x80000005:
- SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140);
+ case 0x00000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
- case 0x80000006:
- SET_ABCD(0x00000000, 0x42004200, 0x02008140, 0x00000000);
+ case 0x00000008:
+ SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
break;
- case 0x80000007:
- SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f);
+ case 0x00000009:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
break;
- case 0x80000008:
- SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000);
+ case 0x0000000a:
+ unhandled_eax_value:
+ SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
break;
- default:
- SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ case 0x80000000:
+ SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800);
+ break;
+ case 0x80000002:
+ SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
break;
+ case 0x80000004:
+ SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
+ break;
+ case 0x80000005:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ goto unhandled_eax_value;
}
# undef SET_ABCD
}
}
+/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
+ values (aa,bb), computes, for each of the 4 16-bit lanes:
+
+ (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
+*/
+static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp aahi32s = newTemp(Ity_I64);
+ IRTemp aalo32s = newTemp(Ity_I64);
+ IRTemp bbhi32s = newTemp(Ity_I64);
+ IRTemp bblo32s = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp one32x2 = newTemp(Ity_I64);
+ assign(aa, aax);
+ assign(bb, bbx);
+ assign( aahi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( aalo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( bbhi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign( bblo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign(one32x2, mkU64( (1ULL << 32) + 1 ));
+ assign(
+ rHi,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ assign(
+ rLo,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ return
+ binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
+}
+
+/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
+ values (aa,bb), computes, for each lane:
+
+ if aa_lane < 0 then - bb_lane
+ else if aa_lane > 0 then bb_lane
+ else 0
+*/
+static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp bbNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opCmpGTS = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
+ case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
+ case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( bb, bbx );
+ assign( zero, mkU64(0) );
+ assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
+ assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
+ assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
+
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
+
+}
+
+/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
+ value aa, computes, for each lane
+
+ if aa < 0 then -aa else aa
+
+ Note that the result is interpreted as unsigned, so that the
+ absolute value of the most negative signed input can be
+ represented.
+*/
+static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp aaNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opSarN = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
+ case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
+ case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
+ assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
+ assign( zero, mkU64(0) );
+ assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
+}
+
+static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
+ IRTemp lo64, Long byteShift )
+{
+ vassert(byteShift >= 1 && byteShift <= 7);
+ return
+ binop(Iop_Or64,
+ binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
+ binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
+ );
+}
+
+/* Generate a SIGSEGV followed by a restart of the current instruction
+ if effective_addr is not 16-aligned. This is required behaviour
+ for some SSE3 instructions and all 128-bit SSSE3 instructions.
+ This assumes that guest_RIP_curr_instr is set correctly! */
+static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
+{
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE64,
+ binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)),
+ mkU64(0)),
+ Ijk_SigSEGV,
+ IRConst_U64(guest_RIP_curr_instr)
+ )
+ );
+}
+
+
/* Helper for deciding whether a given insn (starting at the opcode
byte) may validly be used with a LOCK prefix. The following insns
may be used with LOCK when their destination operand is in memory.
/* --- end of the SSE3 decoder. --- */
/* ---------------------------------------------------- */
+ /* ---------------------------------------------------- */
+ /* --- start of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp sVoddsSX = newTemp(Ity_I64);
+ IRTemp sVevensSX = newTemp(Ity_I64);
+ IRTemp dVoddsZX = newTemp(Ity_I64);
+ IRTemp dVevensZX = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x4,
+ binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x4,
+ binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop(Iop_QAdd16Sx4,
+ binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sVoddsSX = newTemp(Ity_V128);
+ IRTemp sVevensSX = newTemp(Ity_V128);
+ IRTemp dVoddsZX = newTemp(Ity_V128);
+ IRTemp dVevensZX = newTemp(Ity_V128);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x8,
+ binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_QAdd16Sx8,
+ binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
+ /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
+ to G (mmx). */
+
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop(opV64,
+ binop(opCatE,mkexpr(sV),mkexpr(dV)),
+ binop(opCatO,mkexpr(sV),mkexpr(dV))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
+ G to G (xmm). */
+
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRexRM(pfx,modrm)) );
+ DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ delta += 3+alen;
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ /* This isn't a particularly efficient way to compute the
+ result, but at least it avoids a proliferation of IROps,
+ hence avoids complication all the backends. */
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ binop(opV64,
+ binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
+ binop(opCatO,mkexpr(sHi),mkexpr(sLo))
+ ),
+ binop(opV64,
+ binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
+ binop(opCatO,mkexpr(dHi),mkexpr(dLo))
+ )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
+ (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
+ Scale (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
+ dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
+ /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
+ /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
+ /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
+ /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
+ dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
+ /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
+ /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ dis_PABS_helper( mkexpr(sV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
+ /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
+ /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128,
+ dis_PABS_helper( mkexpr(sHi), laneszB ),
+ dis_PABS_helper( mkexpr(sLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
+ if (haveNo66noF2noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp res = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ d64 = (Long)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d64,
+ nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ d64 = (Long)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d%s,%s\n", (Int)d64,
+ dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ if (d64 == 0) {
+ assign( res, mkexpr(sV) );
+ }
+ else if (d64 >= 1 && d64 <= 7) {
+ assign(res,
+ binop(Iop_Or64,
+ binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
+ binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
+ )));
+ }
+ else if (d64 == 8) {
+ assign( res, mkexpr(dV) );
+ }
+ else if (d64 >= 9 && d64 <= 15) {
+ assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
+ }
+ else if (d64 >= 16 && d64 <= 255) {
+ assign( res, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
+ goto decode_success;
+ }
+
+ /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ d64 = (Long)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d64,
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ d64 = (Long)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d64,
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (d64 == 0) {
+ assign( rHi, mkexpr(sHi) );
+ assign( rLo, mkexpr(sLo) );
+ }
+ else if (d64 >= 1 && d64 <= 7) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) );
+ assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) );
+ }
+ else if (d64 == 8) {
+ assign( rHi, mkexpr(dLo) );
+ assign( rLo, mkexpr(sHi) );
+ }
+ else if (d64 >= 9 && d64 <= 15) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) );
+ }
+ else if (d64 == 16) {
+ assign( rHi, mkexpr(dHi) );
+ assign( rLo, mkexpr(dLo) );
+ }
+ else if (d64 >= 17 && d64 <= 23) {
+ assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) );
+ }
+ else if (d64 == 24) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkexpr(dHi) );
+ }
+ else if (d64 >= 25 && d64 <= 31) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-24))) );
+ }
+ else if (d64 >= 32 && d64 <= 255) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
+ if (haveNo66noF2noF3(pfx)
+ && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
+ nameMMXReg(gregLO3ofRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameMMXReg(gregLO3ofRM(modrm)));
+ }
+
+ putMMXReg(
+ gregLO3ofRM(modrm),
+ binop(
+ Iop_And64,
+ /* permute the lanes */
+ binop(
+ Iop_Perm8x8,
+ mkexpr(dV),
+ binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
+ ),
+ /* mask off lanes which have (index & 0x80) == 0x80 */
+ unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
+ if (have66noF2noF3(pfx)
+ && (sz == 2 || /*redundant REX.W*/ sz == 8)
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp sevens = newTemp(Ity_I64);
+ IRTemp mask0x80hi = newTemp(Ity_I64);
+ IRTemp mask0x80lo = newTemp(Ity_I64);
+ IRTemp maskBit3hi = newTemp(Ity_I64);
+ IRTemp maskBit3lo = newTemp(Ity_I64);
+ IRTemp sAnd7hi = newTemp(Ity_I64);
+ IRTemp sAnd7lo = newTemp(Ity_I64);
+ IRTemp permdHi = newTemp(Ity_I64);
+ IRTemp permdLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ assign( sevens, mkU64(0x0707070707070707ULL) );
+
+ /*
+ mask0x80hi = Not(SarN8x8(sHi,7))
+ maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
+ sAnd7hi = And(sHi,sevens)
+ permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
+ And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
+ rHi = And(permdHi,mask0x80hi)
+ */
+ assign(
+ mask0x80hi,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
+
+ assign(
+ maskBit3hi,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
+
+ assign(
+ permdHi,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
+ mkexpr(maskBit3hi)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
+ unop(Iop_Not64,mkexpr(maskBit3hi))) ));
+
+ assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
+
+ /* And the same for the lower half of the result. What fun. */
+
+ assign(
+ mask0x80lo,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
+
+ assign(
+ maskBit3lo,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
+
+ assign(
+ permdLo,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
+ mkexpr(maskBit3lo)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
+ unop(Iop_Not64,mkexpr(maskBit3lo))) ));
+
+ assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
+
+ putXMMReg(
+ gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
/*after_sse_decoders:*/
/* Get the primary opcode. */
d->fxState[1].fx = Ifx_Write;
d->fxState[1].offset = OFFB_RBX;
d->fxState[1].size = 8;
- d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].fx = Ifx_Modify;
d->fxState[2].offset = OFFB_RCX;
d->fxState[2].size = 8;
d->fxState[3].fx = Ifx_Write;
decode_failure:
/* All decode failures end up here. */
vex_printf("vex amd64->IR: unhandled instruction bytes: "
- "0x%x 0x%x 0x%x 0x%x\n",
+ "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
(Int)getUChar(delta_start+0),
(Int)getUChar(delta_start+1),
(Int)getUChar(delta_start+2),
- (Int)getUChar(delta_start+3) );
+ (Int)getUChar(delta_start+3),
+ (Int)getUChar(delta_start+4),
+ (Int)getUChar(delta_start+5) );
/* Tell the dispatcher that this insn cannot be decoded, and so has
not been executed, and (is currently) the next to be executed.
}
}
-/* Claim to be the following SSE2-capable CPU:
+/* Claim to be the following SSSE3-capable CPU (2 x ...):
vendor_id : GenuineIntel
- cpu family : 15
- model : 2
- model name : Intel(R) Pentium(R) 4 CPU 2.40GHz
- stepping : 7
- cpu MHz : 2394.234
- cache size : 512 KB
+ cpu family : 6
+ model : 15
+ model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz
+ stepping : 6
+ cpu MHz : 2394.000
+ cache size : 4096 KB
+ physical id : 0
+ siblings : 2
+ core id : 0
+ cpu cores : 2
+ fpu : yes
+ fpu_exception : yes
+ cpuid level : 10
+ wp : yes
+ flags : fpu vme de pse tsc msr pae mce cx8 apic sep
+ mtrr pge mca cmov pat pse36 clflush dts acpi
+ mmx fxsr sse sse2 ss ht tm syscall nx lm
+ constant_tsc pni monitor ds_cpl vmx est tm2
+ cx16 xtpr lahf_lm
+ bogomips : 4798.78
+ clflush size : 64
+ cache_alignment : 64
+ address sizes : 36 bits physical, 48 bits virtual
+ power management:
*/
void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st )
{
+# define SET_ABCD(_a,_b,_c,_d) \
+ do { st->guest_EAX = (UInt)(_a); \
+ st->guest_EBX = (UInt)(_b); \
+ st->guest_ECX = (UInt)(_c); \
+ st->guest_EDX = (UInt)(_d); \
+ } while (0)
+
switch (st->guest_EAX) {
- case 0:
- st->guest_EAX = 0x00000002;
- st->guest_EBX = 0x756e6547;
- st->guest_ECX = 0x6c65746e;
- st->guest_EDX = 0x49656e69;
+ case 0x00000000:
+ SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69);
break;
- case 1:
- st->guest_EAX = 0x00000f27;
- st->guest_EBX = 0x00010809;
- st->guest_ECX = 0x00004400;
- st->guest_EDX = 0xbfebfbff;
+ case 0x00000001:
+ SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff);
break;
- default:
- st->guest_EAX = 0x665b5101;
- st->guest_EBX = 0x00000000;
- st->guest_ECX = 0x00000000;
- st->guest_EDX = 0x007b7040;
+ case 0x00000002:
+ SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049);
+ break;
+ case 0x00000003:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000004: {
+ switch (st->guest_ECX) {
+ case 0x00000000: SET_ABCD(0x04000121, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000001: SET_ABCD(0x04000122, 0x01c0003f,
+ 0x0000003f, 0x00000001); break;
+ case 0x00000002: SET_ABCD(0x04004143, 0x03c0003f,
+ 0x00000fff, 0x00000001); break;
+ default: SET_ABCD(0x00000000, 0x00000000,
+ 0x00000000, 0x00000000); break;
+ }
+ break;
+ }
+ case 0x00000005:
+ SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020);
+ break;
+ case 0x00000006:
+ SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000);
+ break;
+ case 0x00000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000008:
+ SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x00000009:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x0000000a:
+ unhandled_eax_value:
+ SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000000:
+ SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000001:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000);
break;
+ case 0x80000002:
+ SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865);
+ break;
+ case 0x80000003:
+ SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020);
+ break;
+ case 0x80000004:
+ SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847);
+ break;
+ case 0x80000005:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000006:
+ SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000);
+ break;
+ case 0x80000007:
+ SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ case 0x80000008:
+ SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000);
+ break;
+ default:
+ goto unhandled_eax_value;
}
+# undef SET_ABCD
}
}
+/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
+ values (aa,bb), computes, for each of the 4 16-bit lanes:
+
+ (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
+*/
+static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp aahi32s = newTemp(Ity_I64);
+ IRTemp aalo32s = newTemp(Ity_I64);
+ IRTemp bbhi32s = newTemp(Ity_I64);
+ IRTemp bblo32s = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp one32x2 = newTemp(Ity_I64);
+ assign(aa, aax);
+ assign(bb, bbx);
+ assign( aahi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( aalo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
+ mkU8(16) ));
+ assign( bbhi32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign( bblo32s,
+ binop(Iop_SarN32x2,
+ binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
+ mkU8(16) ));
+ assign(one32x2, mkU64( (1ULL << 32) + 1 ));
+ assign(
+ rHi,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ assign(
+ rLo,
+ binop(
+ Iop_ShrN32x2,
+ binop(
+ Iop_Add32x2,
+ binop(
+ Iop_ShrN32x2,
+ binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
+ mkU8(14)
+ ),
+ mkexpr(one32x2)
+ ),
+ mkU8(1)
+ )
+ );
+ return
+ binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
+}
+
+/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
+ values (aa,bb), computes, for each lane:
+
+ if aa_lane < 0 then - bb_lane
+ else if aa_lane > 0 then bb_lane
+ else 0
+*/
+static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp bb = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp bbNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opCmpGTS = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
+ case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
+ case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( bb, bbx );
+ assign( zero, mkU64(0) );
+ assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
+ assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
+ assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
+
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
+
+}
+
+/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
+ value aa, computes, for each lane
+
+ if aa < 0 then -aa else aa
+
+ Note that the result is interpreted as unsigned, so that the
+ absolute value of the most negative signed input can be
+ represented.
+*/
+static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB )
+{
+ IRTemp aa = newTemp(Ity_I64);
+ IRTemp zero = newTemp(Ity_I64);
+ IRTemp aaNeg = newTemp(Ity_I64);
+ IRTemp negMask = newTemp(Ity_I64);
+ IRTemp posMask = newTemp(Ity_I64);
+ IROp opSub = Iop_INVALID;
+ IROp opSarN = Iop_INVALID;
+
+ switch (laneszB) {
+ case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
+ case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
+ case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
+ default: vassert(0);
+ }
+
+ assign( aa, aax );
+ assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
+ assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
+ assign( zero, mkU64(0) );
+ assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
+ return
+ binop(Iop_Or64,
+ binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
+ binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) );
+}
+
+static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
+ IRTemp lo64, Int byteShift )
+{
+ vassert(byteShift >= 1 && byteShift <= 7);
+ return
+ binop(Iop_Or64,
+ binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
+ binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
+ );
+}
+
+/* Generate a SIGSEGV followed by a restart of the current instruction
+ if effective_addr is not 16-aligned. This is required behaviour
+ for some SSE3 instructions and all 128-bit SSSE3 instructions.
+ This assumes that guest_RIP_curr_instr is set correctly! */
+static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr )
+{
+ stmt(
+ IRStmt_Exit(
+ binop(Iop_CmpNE32,
+ binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
+ mkU32(0)),
+ Ijk_SigSEGV,
+ IRConst_U32(guest_EIP_curr_instr)
+ )
+ );
+}
+
+
/* Helper for deciding whether a given insn (starting at the opcode
byte) may validly be used with a LOCK prefix. The following insns
may be used with LOCK when their destination operand is in memory.
/* --- end of the SSE3 decoder. --- */
/* ---------------------------------------------------- */
+ /* ---------------------------------------------------- */
+ /* --- start of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
+ /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp sVoddsSX = newTemp(Ity_I64);
+ IRTemp sVevensSX = newTemp(Ity_I64);
+ IRTemp dVoddsZX = newTemp(Ity_I64);
+ IRTemp dVevensZX = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x4,
+ binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x4,
+ binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putMMXReg(
+ gregOfRM(modrm),
+ binop(Iop_QAdd16Sx4,
+ binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
+ Unsigned Bytes (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sVoddsSX = newTemp(Ity_V128);
+ IRTemp sVevensSX = newTemp(Ity_V128);
+ IRTemp dVoddsZX = newTemp(Ity_V128);
+ IRTemp dVevensZX = newTemp(Ity_V128);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmaddubsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ /* compute dV unsigned x sV signed */
+ assign( sVoddsSX,
+ binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
+ assign( sVevensSX,
+ binop(Iop_SarN16x8,
+ binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
+ mkU8(8)) );
+ assign( dVoddsZX,
+ binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
+ assign( dVevensZX,
+ binop(Iop_ShrN16x8,
+ binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
+ mkU8(8)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_QAdd16Sx8,
+ binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
+ binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
+ /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
+ mmx) and G to G (mmx). */
+ /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
+ to G (mmx). */
+ /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
+ to G (mmx). */
+
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ binop(opV64,
+ binop(opCatE,mkexpr(sV),mkexpr(dV)),
+ binop(opCatO,mkexpr(sV),mkexpr(dV))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
+ xmm) and G to G (xmm). */
+ /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
+ G to G (xmm). */
+ /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
+ G to G (xmm). */
+
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01
+ || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) {
+ HChar* str = "???";
+ IROp opV64 = Iop_INVALID;
+ IROp opCatO = Iop_CatOddLanes16x4;
+ IROp opCatE = Iop_CatEvenLanes16x4;
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+
+ switch (insn[2]) {
+ case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
+ case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
+ case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
+ case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
+ case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
+ case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
+ default: vassert(0);
+ }
+ if (insn[2] == 0x02 || insn[2] == 0x06) {
+ opCatO = Iop_InterleaveHI32x2;
+ opCatE = Iop_InterleaveLO32x2;
+ }
+
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg( eregOfRM(modrm)) );
+ DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("ph%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ /* This isn't a particularly efficient way to compute the
+ result, but at least it avoids a proliferation of IROps,
+ hence avoids complication all the backends. */
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ binop(opV64,
+ binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
+ binop(opCatO,mkexpr(sHi),mkexpr(sLo))
+ ),
+ binop(opV64,
+ binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
+ binop(opCatO,mkexpr(dHi),mkexpr(dLo))
+ )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
+ (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
+ Scale (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pmulhrsw %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
+ dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
+ /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
+ /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
+ /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
+ /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x08: laneszB = 1; str = "b"; break;
+ case 0x09: laneszB = 2; str = "w"; break;
+ case 0x0A: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("psign%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
+ dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
+ /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
+ /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+ do_MMX_preamble();
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ dis_PABS_helper( mkexpr(sV), laneszB )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
+ /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
+ /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38
+ && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ HChar* str = "???";
+ Int laneszB = 0;
+
+ switch (insn[2]) {
+ case 0x1C: laneszB = 1; str = "b"; break;
+ case 0x1D: laneszB = 2; str = "w"; break;
+ case 0x1E: laneszB = 4; str = "d"; break;
+ default: vassert(0);
+ }
+
+ modrm = insn[3];
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pabs%s %s,%s\n", str, dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128,
+ dis_PABS_helper( mkexpr(sHi), laneszB ),
+ dis_PABS_helper( mkexpr(sLo), laneszB )
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+ IRTemp res = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ d32 = (UInt)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d32,
+ nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ d32 = (UInt)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d%s,%s\n", (Int)d32,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ if (d32 == 0) {
+ assign( res, mkexpr(sV) );
+ }
+ else if (d32 >= 1 && d32 <= 7) {
+ assign(res,
+ binop(Iop_Or64,
+ binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)),
+ binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32))
+ )));
+ }
+ else if (d32 == 8) {
+ assign( res, mkexpr(dV) );
+ }
+ else if (d32 >= 9 && d32 <= 15) {
+ assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) );
+ }
+ else if (d32 >= 16 && d32 <= 255) {
+ assign( res, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putMMXReg( gregOfRM(modrm), mkexpr(res) );
+ goto decode_success;
+ }
+
+ /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ d32 = (UInt)insn[3+1];
+ delta += 3+1+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d32,
+ nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ d32 = (UInt)insn[3+alen];
+ delta += 3+alen+1;
+ DIP("palignr $%d,%s,%s\n", (Int)d32,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (d32 == 0) {
+ assign( rHi, mkexpr(sHi) );
+ assign( rLo, mkexpr(sLo) );
+ }
+ else if (d32 >= 1 && d32 <= 7) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) );
+ assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) );
+ }
+ else if (d32 == 8) {
+ assign( rHi, mkexpr(dLo) );
+ assign( rLo, mkexpr(sHi) );
+ }
+ else if (d32 >= 9 && d32 <= 15) {
+ assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) );
+ }
+ else if (d32 == 16) {
+ assign( rHi, mkexpr(dHi) );
+ assign( rLo, mkexpr(dLo) );
+ }
+ else if (d32 >= 17 && d32 <= 23) {
+ assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) );
+ assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) );
+ }
+ else if (d32 == 24) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkexpr(dHi) );
+ }
+ else if (d32 >= 25 && d32 <= 31) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) );
+ }
+ else if (d32 >= 32 && d32 <= 255) {
+ assign( rHi, mkU64(0) );
+ assign( rLo, mkU64(0) );
+ }
+ else
+ vassert(0);
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
+ if (sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_I64);
+ IRTemp dV = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ do_MMX_preamble();
+ assign( dV, getMMXReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ putMMXReg(
+ gregOfRM(modrm),
+ binop(
+ Iop_And64,
+ /* permute the lanes */
+ binop(
+ Iop_Perm8x8,
+ mkexpr(dV),
+ binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
+ ),
+ /* mask off lanes which have (index & 0x80) == 0x80 */
+ unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
+ )
+ );
+ goto decode_success;
+ }
+
+ /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) {
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ IRTemp sHi = newTemp(Ity_I64);
+ IRTemp sLo = newTemp(Ity_I64);
+ IRTemp dHi = newTemp(Ity_I64);
+ IRTemp dLo = newTemp(Ity_I64);
+ IRTemp rHi = newTemp(Ity_I64);
+ IRTemp rLo = newTemp(Ity_I64);
+ IRTemp sevens = newTemp(Ity_I64);
+ IRTemp mask0x80hi = newTemp(Ity_I64);
+ IRTemp mask0x80lo = newTemp(Ity_I64);
+ IRTemp maskBit3hi = newTemp(Ity_I64);
+ IRTemp maskBit3lo = newTemp(Ity_I64);
+ IRTemp sAnd7hi = newTemp(Ity_I64);
+ IRTemp sAnd7lo = newTemp(Ity_I64);
+ IRTemp permdHi = newTemp(Ity_I64);
+ IRTemp permdLo = newTemp(Ity_I64);
+
+ modrm = insn[3];
+ assign( dV, getXMMReg(gregOfRM(modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRM(modrm)) );
+ delta += 3+1;
+ DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 3+alen;
+ DIP("pshufb %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+
+ assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
+
+ assign( sevens, mkU64(0x0707070707070707ULL) );
+
+ /*
+ mask0x80hi = Not(SarN8x8(sHi,7))
+ maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
+ sAnd7hi = And(sHi,sevens)
+ permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
+ And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
+ rHi = And(permdHi,mask0x80hi)
+ */
+ assign(
+ mask0x80hi,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
+
+ assign(
+ maskBit3hi,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
+
+ assign(
+ permdHi,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
+ mkexpr(maskBit3hi)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
+ unop(Iop_Not64,mkexpr(maskBit3hi))) ));
+
+ assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
+
+ /* And the same for the lower half of the result. What fun. */
+
+ assign(
+ mask0x80lo,
+ unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
+
+ assign(
+ maskBit3lo,
+ binop(Iop_SarN8x8,
+ binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
+ mkU8(7)));
+
+ assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
+
+ assign(
+ permdLo,
+ binop(
+ Iop_Or64,
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
+ mkexpr(maskBit3lo)),
+ binop(Iop_And64,
+ binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
+ unop(Iop_Not64,mkexpr(maskBit3lo))) ));
+
+ assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
+
+ putXMMReg(
+ gregOfRM(modrm),
+ binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo))
+ );
+ goto decode_success;
+ }
+
+ /* ---------------------------------------------------- */
+ /* --- end of the SSSE3 decoder. --- */
+ /* ---------------------------------------------------- */
+
after_sse_decoders:
/* ---------------------------------------------------- */
d->fxState[1].fx = Ifx_Write;
d->fxState[1].offset = OFFB_EBX;
d->fxState[1].size = 4;
- d->fxState[2].fx = Ifx_Write;
+ d->fxState[2].fx = Ifx_Modify;
d->fxState[2].offset = OFFB_ECX;
d->fxState[2].size = 4;
d->fxState[3].fx = Ifx_Write;
case Ijk_SigTRAP:
*p++ = 0xBD;
p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
+ case Ijk_SigSEGV:
+ *p++ = 0xBD;
+ p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
case Ijk_Ret:
case Ijk_Call:
case Ijk_Boring:
fn = (HWord)h_generic_calc_InterleaveHI32x2; break;
case Iop_InterleaveLO32x2:
fn = (HWord)h_generic_calc_InterleaveLO32x2; break;
+ case Iop_CatOddLanes16x4:
+ fn = (HWord)h_generic_calc_CatOddLanes16x4; break;
+ case Iop_CatEvenLanes16x4:
+ fn = (HWord)h_generic_calc_CatEvenLanes16x4; break;
+ case Iop_Perm8x8:
+ fn = (HWord)h_generic_calc_Perm8x8; break;
case Iop_Max8Ux8:
fn = (HWord)h_generic_calc_Max8Ux8; break;
case Iop_Mul16x4:
fn = (HWord)h_generic_calc_Mul16x4; break;
+ case Iop_Mul32x2:
+ fn = (HWord)h_generic_calc_Mul32x2; break;
case Iop_MulHi16Sx4:
fn = (HWord)h_generic_calc_MulHi16Sx4; break;
case Iop_MulHi16Ux4:
fn = (HWord)h_generic_calc_ShlN16x4;
second_is_UInt = True;
break;
+ case Iop_ShlN8x8:
+ fn = (HWord)h_generic_calc_ShlN8x8;
+ second_is_UInt = True;
+ break;
case Iop_ShrN32x2:
fn = (HWord)h_generic_calc_ShrN32x2;
second_is_UInt = True;
return toUChar(0xFF & (lo32 >> 0));
}
+static inline UChar index8x8 ( ULong w64, UChar ix ) {
+ ix &= 7;
+ return toUChar((w64 >> (8*ix)) & 0xFF);
+}
+
/* Scalar helpers. */
return (Short)t;
}
+static inline Int mul32 ( Int xx, Int yy )
+{
+ Int t = ((Int)xx) * ((Int)yy);
+ return (Int)t;
+}
+
static inline Short mulhi16S ( Short xx, Short yy )
{
Int t = ((Int)xx) * ((Int)yy);
/* shifts: we don't care about out-of-range ones, since
that is dealt with at a higher level. */
+static inline UChar shl8 ( UChar v, UInt n )
+{
+ return toUChar(v << n);
+}
+
static inline UChar sar8 ( UChar v, UInt n )
{
return toUChar(((Char)v) >> n);
);
}
+ULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy )
+{
+ return mk32x2(
+ mul32( sel32x2_1(xx), sel32x2_1(yy) ),
+ mul32( sel32x2_0(xx), sel32x2_0(yy) )
+ );
+}
+
ULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy )
{
return mk16x4(
);
}
+/* ------------ Concatenation ------------ */
+
+ULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb )
+{
+ return mk16x4(
+ sel16x4_3(aa),
+ sel16x4_1(aa),
+ sel16x4_3(bb),
+ sel16x4_1(bb)
+ );
+}
+
+ULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb )
+{
+ return mk16x4(
+ sel16x4_2(aa),
+ sel16x4_0(aa),
+ sel16x4_2(bb),
+ sel16x4_0(bb)
+ );
+}
+
+/* misc hack looking for a proper home */
+ULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb )
+{
+ return mk8x8(
+ index8x8(aa, sel8x8_7(bb)),
+ index8x8(aa, sel8x8_6(bb)),
+ index8x8(aa, sel8x8_5(bb)),
+ index8x8(aa, sel8x8_4(bb)),
+ index8x8(aa, sel8x8_3(bb)),
+ index8x8(aa, sel8x8_2(bb)),
+ index8x8(aa, sel8x8_1(bb)),
+ index8x8(aa, sel8x8_0(bb))
+ );
+}
/* ------------ Shifting ------------ */
/* Note that because these primops are undefined if the shift amount
);
}
+ULong h_generic_calc_ShlN8x8 ( ULong xx, UInt nn )
+{
+ /* vassert(nn < 8); */
+ nn &= 7;
+ return mk8x8(
+ shl8( sel8x8_7(xx), nn ),
+ shl8( sel8x8_6(xx), nn ),
+ shl8( sel8x8_5(xx), nn ),
+ shl8( sel8x8_4(xx), nn ),
+ shl8( sel8x8_3(xx), nn ),
+ shl8( sel8x8_2(xx), nn ),
+ shl8( sel8x8_1(xx), nn ),
+ shl8( sel8x8_0(xx), nn )
+ );
+}
+
ULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn )
{
/* vassert(nn < 32); */
extern ULong h_generic_calc_QSub8Ux8 ( ULong, ULong );
extern ULong h_generic_calc_Mul16x4 ( ULong, ULong );
+extern ULong h_generic_calc_Mul32x2 ( ULong, ULong );
extern ULong h_generic_calc_MulHi16Sx4 ( ULong, ULong );
extern ULong h_generic_calc_MulHi16Ux4 ( ULong, ULong );
extern ULong h_generic_calc_InterleaveHI32x2 ( ULong, ULong );
extern ULong h_generic_calc_InterleaveLO32x2 ( ULong, ULong );
+extern ULong h_generic_calc_CatOddLanes16x4 ( ULong, ULong );
+extern ULong h_generic_calc_CatEvenLanes16x4 ( ULong, ULong );
+extern ULong h_generic_calc_Perm8x8 ( ULong, ULong );
+
+extern ULong h_generic_calc_ShlN8x8 ( ULong, UInt );
extern ULong h_generic_calc_ShlN16x4 ( ULong, UInt );
extern ULong h_generic_calc_ShlN32x2 ( ULong, UInt );
fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish;
case Iop_InterleaveLO32x2:
fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish;
+ case Iop_CatOddLanes16x4:
+ fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish;
+ case Iop_CatEvenLanes16x4:
+ fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish;
+ case Iop_Perm8x8:
+ fn = (HWord)h_generic_calc_Perm8x8; goto binnish;
case Iop_Max8Ux8:
fn = (HWord)h_generic_calc_Max8Ux8; goto binnish;
case Iop_Mul16x4:
fn = (HWord)h_generic_calc_Mul16x4; goto binnish;
+ case Iop_Mul32x2:
+ fn = (HWord)h_generic_calc_Mul32x2; goto binnish;
case Iop_MulHi16Sx4:
fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish;
case Iop_MulHi16Ux4:
fn = (HWord)h_generic_calc_ShlN32x2; goto shifty;
case Iop_ShlN16x4:
fn = (HWord)h_generic_calc_ShlN16x4; goto shifty;
+ case Iop_ShlN8x8:
+ fn = (HWord)h_generic_calc_ShlN8x8; goto shifty;
case Iop_ShrN32x2:
fn = (HWord)h_generic_calc_ShrN32x2; goto shifty;
case Iop_ShrN16x4:
case Iop_QSub8Sx8: vex_printf("QSub8Sx8"); return;
case Iop_QSub16Sx4: vex_printf("QSub16Sx4"); return;
case Iop_Mul16x4: vex_printf("Mul16x4"); return;
+ case Iop_Mul32x2: vex_printf("Mul32x2"); return;
case Iop_MulHi16Ux4: vex_printf("MulHi16Ux4"); return;
case Iop_MulHi16Sx4: vex_printf("MulHi16Sx4"); return;
case Iop_Avg8Ux8: vex_printf("Avg8Ux8"); return;
case Iop_CmpGT8Sx8: vex_printf("CmpGT8Sx8"); return;
case Iop_CmpGT16Sx4: vex_printf("CmpGT16Sx4"); return;
case Iop_CmpGT32Sx2: vex_printf("CmpGT32Sx2"); return;
+ case Iop_ShlN8x8: vex_printf("ShlN8x8"); return;
case Iop_ShlN16x4: vex_printf("ShlN16x4"); return;
case Iop_ShlN32x2: vex_printf("ShlN32x2"); return;
case Iop_ShrN16x4: vex_printf("ShrN16x4"); return;
case Iop_InterleaveLO8x8: vex_printf("InterleaveLO8x8"); return;
case Iop_InterleaveLO16x4: vex_printf("InterleaveLO16x4"); return;
case Iop_InterleaveLO32x2: vex_printf("InterleaveLO32x2"); return;
+ case Iop_CatOddLanes16x4: vex_printf("CatOddLanes16x4"); return;
+ case Iop_CatEvenLanes16x4: vex_printf("CatEvenLanes16x4"); return;
+ case Iop_Perm8x8: vex_printf("Iop_Perm8x8"); return;
case Iop_CmpNEZ32x2: vex_printf("CmpNEZ32x2"); return;
case Iop_CmpNEZ16x4: vex_printf("CmpNEZ16x4"); return;
case Iop_InterleaveHI8x8: case Iop_InterleaveLO8x8:
case Iop_InterleaveHI16x4: case Iop_InterleaveLO16x4:
case Iop_InterleaveHI32x2: case Iop_InterleaveLO32x2:
+ case Iop_CatOddLanes16x4: case Iop_CatEvenLanes16x4:
+ case Iop_Perm8x8:
case Iop_Max8Ux8: case Iop_Max16Sx4:
case Iop_Min8Ux8: case Iop_Min16Sx4:
- case Iop_Mul16x4: case Iop_MulHi16Sx4: case Iop_MulHi16Ux4:
+ case Iop_Mul16x4: case Iop_Mul32x2:
+ case Iop_MulHi16Sx4: case Iop_MulHi16Ux4:
case Iop_QAdd8Sx8: case Iop_QAdd16Sx4:
case Iop_QAdd8Ux8: case Iop_QAdd16Ux4:
case Iop_QNarrow32Sx2:
case Iop_QSub8Ux8: case Iop_QSub16Ux4:
BINARY(Ity_I64,Ity_I64, Ity_I64);
- case Iop_ShlN32x2: case Iop_ShlN16x4:
+ case Iop_ShlN32x2: case Iop_ShlN16x4: case Iop_ShlN8x8:
case Iop_ShrN32x2: case Iop_ShrN16x4:
case Iop_SarN32x2: case Iop_SarN16x4: case Iop_SarN8x8:
BINARY(Ity_I64,Ity_I8, Ity_I64);
Iop_QSub8Sx8, Iop_QSub16Sx4,
/* MULTIPLICATION (normal / high half of signed/unsigned) */
- Iop_Mul16x4,
+ Iop_Mul16x4, Iop_Mul32x2,
Iop_MulHi16Ux4,
Iop_MulHi16Sx4,
Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
/* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
- Iop_ShlN16x4, Iop_ShlN32x2,
+ Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
Iop_ShrN16x4, Iop_ShrN32x2,
Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
+ /* CONCATENATION -- build a new value by concatenating either
+ the even or odd lanes of both operands. Note that
+ Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
+ and so are omitted. */
+ Iop_CatOddLanes16x4, Iop_CatEvenLanes16x4,
+
+ /* PERMUTING -- copy src bytes to dst,
+ as indexed by control vector bytes:
+ for i in 0 .. 7 . result[i] = argL[ argR[i] ]
+ argR[i] values may only be in the range 0 .. 7, else behaviour
+ is undefined. */
+ Iop_Perm8x8,
+
/* ------------------ 128-bit SIMD FP. ------------------ */
/* --- 32x4 vector FP --- */