From: Julian Seward Date: Sat, 9 Feb 2008 01:16:02 +0000 (+0000) Subject: Finalise SSSE3 support (counterpart to r1808): X-Git-Tag: svn/VALGRIND_3_4_1^2~42 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9a38400e7866a0a5c4f6033f7fd3cecac844de81;p=thirdparty%2Fvalgrind.git Finalise SSSE3 support (counterpart to r1808): * support SSSE3 for 32-bit insns * For 128-bit variants accessing memory, generate an exception if effective address is not 128-bit aligned * Change CPUID output to be Core-2, so now it claims to be a Core 2 E6600 git-svn-id: svn://svn.valgrind.org/vex/trunk@1809 --- diff --git a/VEX/priv/guest-amd64/ghelpers.c b/VEX/priv/guest-amd64/ghelpers.c index b6aa3289dd..3526f54552 100644 --- a/VEX/priv/guest-amd64/ghelpers.c +++ b/VEX/priv/guest-amd64/ghelpers.c @@ -1758,27 +1758,32 @@ void amd64g_dirtyhelper_FSTENV ( /*IN*/VexGuestAMD64State* vex_state, /*--- Misc integer helpers, including rotates and CPUID. ---*/ /*---------------------------------------------------------------*/ -/* Claim to be the following CPU: - vendor_id : AuthenticAMD - cpu family : 15 - model : 12 - model name : AMD Athlon(tm) 64 Processor 3200+ - stepping : 0 - cpu MHz : 2202.917 - cache size : 512 KB +/* Claim to be the following CPU (2 x ...): + vendor_id : GenuineIntel + cpu family : 6 + model : 15 + model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz + stepping : 6 + cpu MHz : 2394.000 + cache size : 4096 KB + physical id : 0 + siblings : 2 + core id : 0 + cpu cores : 2 fpu : yes fpu_exception : yes - cpuid level : 1 + cpuid level : 10 wp : yes - flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr - pge mca cmov pat pse36 clflush mmx fxsr sse sse2 - pni syscall nx mmxext lm 3dnowext 3dnow - bogomips : 4308.99 - TLB size : 1088 4K pages + flags : fpu vme de pse tsc msr pae mce cx8 apic sep + mtrr pge mca cmov pat pse36 clflush dts acpi + mmx fxsr sse sse2 ss ht tm syscall nx lm + constant_tsc pni monitor ds_cpl vmx est tm2 + cx16 xtpr lahf_lm + bogomips : 4798.78 clflush size : 64 cache_alignment : 64 - address sizes : 40 bits physical, 48 bits virtual - power management: ts fid vid ttp + address sizes : 36 bits physical, 48 bits virtual + power management: */ void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st ) { @@ -1790,38 +1795,71 @@ void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st ) } while (0) switch (0xFFFFFFFF & st->guest_RAX) { - case 0x0: - SET_ABCD(0x00000001, 0x68747541, 0x444d4163, 0x69746e65); + case 0x00000000: + SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); break; - case 0x1: - SET_ABCD(0x00000fc0, 0x00000800, 0x00000000, 0x078bfbff); + case 0x00000001: + SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); break; - case 0x80000000: - SET_ABCD(0x80000018, 0x68747541, 0x444d4163, 0x69746e65); + case 0x00000002: + SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); break; - case 0x80000001: - SET_ABCD(0x00000fc0, 0x0000010a, 0x00000000, 0xe1d3fbff); + case 0x00000003: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); break; - case 0x80000002: - SET_ABCD(0x20444d41, 0x6c687441, 0x74286e6f, 0x3620296d); + case 0x00000004: + SET_ABCD(0x04000121, 0x01c0003f, 0x0000003f, 0x00000001); break; - case 0x80000003: - SET_ABCD(0x72502034, 0x7365636f, 0x20726f73, 0x30303233); + case 0x00000005: + SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); break; - case 0x80000004: - SET_ABCD(0x0000002b, 0x00000000, 0x00000000, 0x00000000); + case 0x00000006: + SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); break; - case 0x80000005: - SET_ABCD(0xff08ff08, 0xff20ff20, 0x40020140, 0x40020140); + case 0x00000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); break; - case 0x80000006: - SET_ABCD(0x00000000, 0x42004200, 0x02008140, 0x00000000); + case 0x00000008: + SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); break; - case 0x80000007: - SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x0000000f); + case 0x00000009: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); break; - case 0x80000008: - SET_ABCD(0x00003028, 0x00000000, 0x00000000, 0x00000000); + case 0x0000000a: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000000: + SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000001: + SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100800); + break; + case 0x80000002: + SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); + break; + case 0x80000003: + SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); + break; + case 0x80000004: + SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); + break; + case 0x80000005: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000006: + SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); + break; + case 0x80000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000008: + SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80860000: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); + break; + case 0xc0000000: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); break; default: SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c index 5c8570d80b..630fccc6ea 100644 --- a/VEX/priv/guest-amd64/toIR.c +++ b/VEX/priv/guest-amd64/toIR.c @@ -8467,6 +8467,23 @@ static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, ); } +/* Generate a SIGSEGV followed by a restart of the current instruction + if effective_addr is not 16-aligned. This is required behaviour + for some SSE3 instructions and all 128-bit SSSE3 instructions. + This assumes that guest_RIP_curr_instr is set correctly! */ +static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) +{ + stmt( + IRStmt_Exit( + binop(Iop_CmpNE64, + binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)), + mkU64(0)), + Ijk_SigSEGV, + IRConst_U64(guest_RIP_curr_instr) + ) + ); +} + /* Helper for deciding whether a given insn (starting at the opcode byte) may validly be used with a LOCK prefix. The following insns @@ -12693,7 +12710,7 @@ DisResult disInstr_AMD64_WRK ( nameXMMReg(gregOfRexRM(pfx,modrm))); } else { addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 ); - /* FIXME: generate trap if addr is not 16-aligned */ + gen_SEGV_if_not_16_aligned( addr ); assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); delta += 3+alen; DIP("pmaddubsw %s,%s\n", dis_buf, @@ -12846,7 +12863,7 @@ DisResult disInstr_AMD64_WRK ( delta += 3+1; } else { addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 ); - /* FIXME: generate trap if addr is not 16-aligned */ + gen_SEGV_if_not_16_aligned( addr ); assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); DIP("ph%s %s,%s\n", str, dis_buf, nameXMMReg(gregOfRexRM(pfx,modrm))); @@ -12931,7 +12948,7 @@ DisResult disInstr_AMD64_WRK ( nameXMMReg(gregOfRexRM(pfx,modrm))); } else { addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 ); - /* FIXME: generate trap if addr is not 16-aligned */ + gen_SEGV_if_not_16_aligned( addr ); assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); delta += 3+alen; DIP("pmulhrsw %s,%s\n", dis_buf, @@ -13029,7 +13046,7 @@ DisResult disInstr_AMD64_WRK ( nameXMMReg(gregOfRexRM(pfx,modrm))); } else { addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 ); - /* FIXME: generate trap if addr is not 16-aligned */ + gen_SEGV_if_not_16_aligned( addr ); assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); delta += 3+alen; DIP("psign%s %s,%s\n", str, dis_buf, @@ -13121,7 +13138,7 @@ DisResult disInstr_AMD64_WRK ( nameXMMReg(gregOfRexRM(pfx,modrm))); } else { addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 ); - /* FIXME: generate trap if addr is not 16-aligned */ + gen_SEGV_if_not_16_aligned( addr ); assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); delta += 3+alen; DIP("pabs%s %s,%s\n", str, dis_buf, @@ -13220,7 +13237,7 @@ DisResult disInstr_AMD64_WRK ( nameXMMReg(gregOfRexRM(pfx,modrm))); } else { addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 ); - /* FIXME: generate trap if addr is not 16-aligned */ + gen_SEGV_if_not_16_aligned( addr ); assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); d64 = (Long)insn[3+alen]; delta += 3+alen+1; @@ -13240,7 +13257,7 @@ DisResult disInstr_AMD64_WRK ( } else if (d64 >= 1 && d64 <= 7) { assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d64) ); - assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) ); + assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d64) ); } else if (d64 == 8) { assign( rHi, mkexpr(dLo) ); @@ -13248,7 +13265,7 @@ DisResult disInstr_AMD64_WRK ( } else if (d64 >= 9 && d64 <= 15) { assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d64-8) ); - assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) ); + assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d64-8) ); } else if (d64 == 16) { assign( rHi, mkexpr(dHi) ); @@ -13256,7 +13273,7 @@ DisResult disInstr_AMD64_WRK ( } else if (d64 >= 17 && d64 <= 23) { assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d64-16))) ); - assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) ); + assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d64-16) ); } else if (d64 == 24) { assign( rHi, mkU64(0) ); @@ -13313,7 +13330,7 @@ DisResult disInstr_AMD64_WRK ( Iop_Perm8x8, mkexpr(dV), binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) - ), + ), /* mask off lanes which have (index & 0x80) == 0x80 */ unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) ) @@ -13353,7 +13370,7 @@ DisResult disInstr_AMD64_WRK ( nameXMMReg(gregOfRexRM(pfx,modrm))); } else { addr = disAMode ( &alen, pfx, delta+3, dis_buf, 0 ); - /* FIXME: generate trap if addr is not 16-aligned */ + gen_SEGV_if_not_16_aligned( addr ); assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); delta += 3+alen; DIP("pshufb %s,%s\n", dis_buf, diff --git a/VEX/priv/guest-x86/ghelpers.c b/VEX/priv/guest-x86/ghelpers.c index c89357603c..8351120e51 100644 --- a/VEX/priv/guest-x86/ghelpers.c +++ b/VEX/priv/guest-x86/ghelpers.c @@ -2075,37 +2075,114 @@ void x86g_dirtyhelper_CPUID_sse1 ( VexGuestX86State* st ) } } -/* Claim to be the following SSE2-capable CPU: +/* Claim to be the following SSSE3-capable CPU (2 x ...): vendor_id : GenuineIntel - cpu family : 15 - model : 2 - model name : Intel(R) Pentium(R) 4 CPU 2.40GHz - stepping : 7 - cpu MHz : 2394.234 - cache size : 512 KB + cpu family : 6 + model : 15 + model name : Intel(R) Core(TM)2 CPU 6600 @ 2.40GHz + stepping : 6 + cpu MHz : 2394.000 + cache size : 4096 KB + physical id : 0 + siblings : 2 + core id : 0 + cpu cores : 2 + fpu : yes + fpu_exception : yes + cpuid level : 10 + wp : yes + flags : fpu vme de pse tsc msr pae mce cx8 apic sep + mtrr pge mca cmov pat pse36 clflush dts acpi + mmx fxsr sse sse2 ss ht tm syscall nx lm + constant_tsc pni monitor ds_cpl vmx est tm2 + cx16 xtpr lahf_lm + bogomips : 4798.78 + clflush size : 64 + cache_alignment : 64 + address sizes : 36 bits physical, 48 bits virtual + power management: */ void x86g_dirtyhelper_CPUID_sse2 ( VexGuestX86State* st ) { +# define SET_ABCD(_a,_b,_c,_d) \ + do { st->guest_EAX = (UInt)(_a); \ + st->guest_EBX = (UInt)(_b); \ + st->guest_ECX = (UInt)(_c); \ + st->guest_EDX = (UInt)(_d); \ + } while (0) + switch (st->guest_EAX) { - case 0: - st->guest_EAX = 0x00000002; - st->guest_EBX = 0x756e6547; - st->guest_ECX = 0x6c65746e; - st->guest_EDX = 0x49656e69; + case 0x00000000: + SET_ABCD(0x0000000a, 0x756e6547, 0x6c65746e, 0x49656e69); break; - case 1: - st->guest_EAX = 0x00000f27; - st->guest_EBX = 0x00010809; - st->guest_ECX = 0x00004400; - st->guest_EDX = 0xbfebfbff; + case 0x00000001: + SET_ABCD(0x000006f6, 0x00020800, 0x0000e3bd, 0xbfebfbff); break; - default: - st->guest_EAX = 0x665b5101; - st->guest_EBX = 0x00000000; - st->guest_ECX = 0x00000000; - st->guest_EDX = 0x007b7040; + case 0x00000002: + SET_ABCD(0x05b0b101, 0x005657f0, 0x00000000, 0x2cb43049); + break; + case 0x00000003: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000004: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000005: + SET_ABCD(0x00000040, 0x00000040, 0x00000003, 0x00000020); + break; + case 0x00000006: + SET_ABCD(0x00000001, 0x00000002, 0x00000001, 0x00000000); + break; + case 0x00000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000008: + SET_ABCD(0x00000400, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x00000009: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x0000000a: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000000: + SET_ABCD(0x80000008, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000001: + SET_ABCD(0x00000000, 0x00000000, 0x00000001, 0x20100000); + break; + case 0x80000002: + SET_ABCD(0x65746e49, 0x2952286c, 0x726f4320, 0x4d542865); + break; + case 0x80000003: + SET_ABCD(0x43203229, 0x20205550, 0x20202020, 0x20202020); + break; + case 0x80000004: + SET_ABCD(0x30303636, 0x20402020, 0x30342e32, 0x007a4847); + break; + case 0x80000005: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000006: + SET_ABCD(0x00000000, 0x00000000, 0x10008040, 0x00000000); + break; + case 0x80000007: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80000008: + SET_ABCD(0x00003024, 0x00000000, 0x00000000, 0x00000000); + break; + case 0x80860000: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); + break; + case 0xc0000000: + SET_ABCD(0x07280202, 0x00000000, 0x00000000, 0x00000000); + break; + default: + SET_ABCD(0x00000000, 0x00000000, 0x00000000, 0x00000000); break; } +# undef SET_ABCD } diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c index 304a85cef5..86a9ac573b 100644 --- a/VEX/priv/guest-x86/toIR.c +++ b/VEX/priv/guest-x86/toIR.c @@ -7225,6 +7225,182 @@ void set_EFLAGS_from_value ( IRTemp t1, } +/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit + values (aa,bb), computes, for each of the 4 16-bit lanes: + + (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1 +*/ +static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx ) +{ + IRTemp aa = newTemp(Ity_I64); + IRTemp bb = newTemp(Ity_I64); + IRTemp aahi32s = newTemp(Ity_I64); + IRTemp aalo32s = newTemp(Ity_I64); + IRTemp bbhi32s = newTemp(Ity_I64); + IRTemp bblo32s = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + IRTemp one32x2 = newTemp(Ity_I64); + assign(aa, aax); + assign(bb, bbx); + assign( aahi32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)), + mkU8(16) )); + assign( aalo32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)), + mkU8(16) )); + assign( bbhi32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)), + mkU8(16) )); + assign( bblo32s, + binop(Iop_SarN32x2, + binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)), + mkU8(16) )); + assign(one32x2, mkU64( (1ULL << 32) + 1 )); + assign( + rHi, + binop( + Iop_ShrN32x2, + binop( + Iop_Add32x2, + binop( + Iop_ShrN32x2, + binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)), + mkU8(14) + ), + mkexpr(one32x2) + ), + mkU8(1) + ) + ); + assign( + rLo, + binop( + Iop_ShrN32x2, + binop( + Iop_Add32x2, + binop( + Iop_ShrN32x2, + binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)), + mkU8(14) + ), + mkexpr(one32x2) + ), + mkU8(1) + ) + ); + return + binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo)); +} + +/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit + values (aa,bb), computes, for each lane: + + if aa_lane < 0 then - bb_lane + else if aa_lane > 0 then bb_lane + else 0 +*/ +static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB ) +{ + IRTemp aa = newTemp(Ity_I64); + IRTemp bb = newTemp(Ity_I64); + IRTemp zero = newTemp(Ity_I64); + IRTemp bbNeg = newTemp(Ity_I64); + IRTemp negMask = newTemp(Ity_I64); + IRTemp posMask = newTemp(Ity_I64); + IROp opSub = Iop_INVALID; + IROp opCmpGTS = Iop_INVALID; + + switch (laneszB) { + case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break; + case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break; + case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break; + default: vassert(0); + } + + assign( aa, aax ); + assign( bb, bbx ); + assign( zero, mkU64(0) ); + assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) ); + assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) ); + assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) ); + + return + binop(Iop_Or64, + binop(Iop_And64, mkexpr(bb), mkexpr(posMask)), + binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) ); + +} + +/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit + value aa, computes, for each lane + + if aa < 0 then -aa else aa + + Note that the result is interpreted as unsigned, so that the + absolute value of the most negative signed input can be + represented. +*/ +static IRExpr* dis_PABS_helper ( IRExpr* aax, Int laneszB ) +{ + IRTemp aa = newTemp(Ity_I64); + IRTemp zero = newTemp(Ity_I64); + IRTemp aaNeg = newTemp(Ity_I64); + IRTemp negMask = newTemp(Ity_I64); + IRTemp posMask = newTemp(Ity_I64); + IROp opSub = Iop_INVALID; + IROp opSarN = Iop_INVALID; + + switch (laneszB) { + case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break; + case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break; + case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break; + default: vassert(0); + } + + assign( aa, aax ); + assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) ); + assign( posMask, unop(Iop_Not64, mkexpr(negMask)) ); + assign( zero, mkU64(0) ); + assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) ); + return + binop(Iop_Or64, + binop(Iop_And64, mkexpr(aa), mkexpr(posMask)), + binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ); +} + +static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64, + IRTemp lo64, Int byteShift ) +{ + vassert(byteShift >= 1 && byteShift <= 7); + return + binop(Iop_Or64, + binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))), + binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift)) + ); +} + +/* Generate a SIGSEGV followed by a restart of the current instruction + if effective_addr is not 16-aligned. This is required behaviour + for some SSE3 instructions and all 128-bit SSSE3 instructions. + This assumes that guest_RIP_curr_instr is set correctly! */ +static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) +{ + stmt( + IRStmt_Exit( + binop(Iop_CmpNE32, + binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)), + mkU32(0)), + Ijk_SigSEGV, + IRConst_U32(guest_EIP_curr_instr) + ) + ); +} + + /* Helper for deciding whether a given insn (starting at the opcode byte) may validly be used with a LOCK prefix. The following insns may be used with LOCK when their destination operand is in memory. @@ -11167,6 +11343,817 @@ DisResult disInstr_X86_WRK ( /* --- end of the SSE3 decoder. --- */ /* ---------------------------------------------------- */ + /* ---------------------------------------------------- */ + /* --- start of the SSSE3 decoder. --- */ + /* ---------------------------------------------------- */ + + /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and + Unsigned Bytes (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + IRTemp sVoddsSX = newTemp(Ity_I64); + IRTemp sVevensSX = newTemp(Ity_I64); + IRTemp dVoddsZX = newTemp(Ity_I64); + IRTemp dVevensZX = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmaddubsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmaddubsw %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + /* compute dV unsigned x sV signed */ + assign( sVoddsSX, + binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) ); + assign( sVevensSX, + binop(Iop_SarN16x4, + binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)), + mkU8(8)) ); + assign( dVoddsZX, + binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) ); + assign( dVevensZX, + binop(Iop_ShrN16x4, + binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)), + mkU8(8)) ); + + putMMXReg( + gregOfRM(modrm), + binop(Iop_QAdd16Sx4, + binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), + binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX)) + ) + ); + goto decode_success; + } + + /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and + Unsigned Bytes (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x04) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sVoddsSX = newTemp(Ity_V128); + IRTemp sVevensSX = newTemp(Ity_V128); + IRTemp dVoddsZX = newTemp(Ity_V128); + IRTemp dVevensZX = newTemp(Ity_V128); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmaddubsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmaddubsw %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + /* compute dV unsigned x sV signed */ + assign( sVoddsSX, + binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) ); + assign( sVevensSX, + binop(Iop_SarN16x8, + binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)), + mkU8(8)) ); + assign( dVoddsZX, + binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) ); + assign( dVevensZX, + binop(Iop_ShrN16x8, + binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)), + mkU8(8)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_QAdd16Sx8, + binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)), + binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX)) + ) + ); + goto decode_success; + } + + /* ***--- these are MMX class insns introduced in SSSE3 ---*** */ + /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or + mmx) and G to G (mmx). */ + /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or + mmx) and G to G (mmx). */ + /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G + to G (mmx). */ + /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G + to G (mmx). */ + + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 + || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { + HChar* str = "???"; + IROp opV64 = Iop_INVALID; + IROp opCatO = Iop_CatOddLanes16x4; + IROp opCatE = Iop_CatEvenLanes16x4; + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + + switch (insn[2]) { + case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; + case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; + case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; + case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; + case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; + case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; + default: vassert(0); + } + if (insn[2] == 0x02 || insn[2] == 0x06) { + opCatO = Iop_InterleaveHI32x2; + opCatE = Iop_InterleaveLO32x2; + } + + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("ph%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("ph%s %s,%s\n", str, dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + binop(opV64, + binop(opCatE,mkexpr(sV),mkexpr(dV)), + binop(opCatO,mkexpr(sV),mkexpr(dV)) + ) + ); + goto decode_success; + } + + /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or + xmm) and G to G (xmm). */ + /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or + xmm) and G to G (xmm). */ + /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and + G to G (xmm). */ + /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and + G to G (xmm). */ + + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x03 || insn[2] == 0x07 || insn[2] == 0x01 + || insn[2] == 0x05 || insn[2] == 0x02 || insn[2] == 0x06)) { + HChar* str = "???"; + IROp opV64 = Iop_INVALID; + IROp opCatO = Iop_CatOddLanes16x4; + IROp opCatE = Iop_CatEvenLanes16x4; + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + + modrm = insn[3]; + + switch (insn[2]) { + case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break; + case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break; + case 0x01: opV64 = Iop_Add16x4; str = "addw"; break; + case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break; + case 0x02: opV64 = Iop_Add32x2; str = "addd"; break; + case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break; + default: vassert(0); + } + if (insn[2] == 0x02 || insn[2] == 0x06) { + opCatO = Iop_InterleaveHI32x2; + opCatE = Iop_InterleaveLO32x2; + } + + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg( eregOfRM(modrm)) ); + DIP("ph%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + delta += 3+1; + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + DIP("ph%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + delta += 3+alen; + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + /* This isn't a particularly efficient way to compute the + result, but at least it avoids a proliferation of IROps, + hence avoids complication all the backends. */ + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + binop(opV64, + binop(opCatE,mkexpr(sHi),mkexpr(sLo)), + binop(opCatO,mkexpr(sHi),mkexpr(sLo)) + ), + binop(opV64, + binop(opCatE,mkexpr(dHi),mkexpr(dLo)), + binop(opCatO,mkexpr(dHi),mkexpr(dLo)) + ) + ) + ); + goto decode_success; + } + + /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale + (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmulhrsw %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmulhrsw %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) ) + ); + goto decode_success; + } + + /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and + Scale (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x0B) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmulhrsw %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ), + dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) ) + ) + ); + goto decode_success; + } + + /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */ + /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */ + /* 0F 38 09 = PSIGND -- Packed Sign 32x2 (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x08: laneszB = 1; str = "b"; break; + case 0x09: laneszB = 2; str = "w"; break; + case 0x0A: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("psign%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("psign%s %s,%s\n", str, dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB ) + ); + goto decode_success; + } + + /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */ + /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */ + /* 66 0F 38 09 = PSIGND -- Packed Sign 32x4 (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x08 || insn[2] == 0x09 || insn[2] == 0x0A)) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x08: laneszB = 1; str = "b"; break; + case 0x09: laneszB = 2; str = "w"; break; + case 0x0A: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("psign%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ), + dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB ) + ) + ); + goto decode_success; + } + + /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */ + /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */ + /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { + IRTemp sV = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x1C: laneszB = 1; str = "b"; break; + case 0x1D: laneszB = 2; str = "w"; break; + case 0x1E: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + do_MMX_preamble(); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pabs%s %s,%s\n", str, nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pabs%s %s,%s\n", str, dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + dis_PABS_helper( mkexpr(sV), laneszB ) + ); + goto decode_success; + } + + /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */ + /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */ + /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 + && (insn[2] == 0x1C || insn[2] == 0x1D || insn[2] == 0x1E)) { + IRTemp sV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + HChar* str = "???"; + Int laneszB = 0; + + switch (insn[2]) { + case 0x1C: laneszB = 1; str = "b"; break; + case 0x1D: laneszB = 2; str = "w"; break; + case 0x1E: laneszB = 4; str = "d"; break; + default: vassert(0); + } + + modrm = insn[3]; + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pabs%s %s,%s\n", str, dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, + dis_PABS_helper( mkexpr(sHi), laneszB ), + dis_PABS_helper( mkexpr(sLo), laneszB ) + ) + ); + goto decode_success; + } + + /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + IRTemp res = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + d32 = (UInt)insn[3+1]; + delta += 3+1+1; + DIP("palignr $%d,%s,%s\n", (Int)d32, + nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + d32 = (UInt)insn[3+alen]; + delta += 3+alen+1; + DIP("palignr $%d%s,%s\n", (Int)d32, + dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + if (d32 == 0) { + assign( res, mkexpr(sV) ); + } + else if (d32 >= 1 && d32 <= 7) { + assign(res, + binop(Iop_Or64, + binop(Iop_Shr64, mkexpr(sV), mkU8(8*d32)), + binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d32)) + ))); + } + else if (d32 == 8) { + assign( res, mkexpr(dV) ); + } + else if (d32 >= 9 && d32 <= 15) { + assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d32-8))) ); + } + else if (d32 >= 16 && d32 <= 255) { + assign( res, mkU64(0) ); + } + else + vassert(0); + + putMMXReg( gregOfRM(modrm), mkexpr(res) ); + goto decode_success; + } + + /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0F) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + d32 = (UInt)insn[3+1]; + delta += 3+1+1; + DIP("palignr $%d,%s,%s\n", (Int)d32, + nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + d32 = (UInt)insn[3+alen]; + delta += 3+alen+1; + DIP("palignr $%d,%s,%s\n", (Int)d32, + dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + if (d32 == 0) { + assign( rHi, mkexpr(sHi) ); + assign( rLo, mkexpr(sLo) ); + } + else if (d32 >= 1 && d32 <= 7) { + assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, d32) ); + assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, d32) ); + } + else if (d32 == 8) { + assign( rHi, mkexpr(dLo) ); + assign( rLo, mkexpr(sHi) ); + } + else if (d32 >= 9 && d32 <= 15) { + assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, d32-8) ); + assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, d32-8) ); + } + else if (d32 == 16) { + assign( rHi, mkexpr(dHi) ); + assign( rLo, mkexpr(dLo) ); + } + else if (d32 >= 17 && d32 <= 23) { + assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-16))) ); + assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, d32-16) ); + } + else if (d32 == 24) { + assign( rHi, mkU64(0) ); + assign( rLo, mkexpr(dHi) ); + } + else if (d32 >= 25 && d32 <= 31) { + assign( rHi, mkU64(0) ); + assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(d32-24))) ); + } + else if (d32 >= 32 && d32 <= 255) { + assign( rHi, mkU64(0) ); + assign( rLo, mkU64(0) ); + } + else + vassert(0); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) + ); + goto decode_success; + } + + /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */ + if (sz == 4 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { + IRTemp sV = newTemp(Ity_I64); + IRTemp dV = newTemp(Ity_I64); + + modrm = insn[3]; + do_MMX_preamble(); + assign( dV, getMMXReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getMMXReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pshufb %s,%s\n", nameMMXReg(eregOfRM(modrm)), + nameMMXReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + assign( sV, loadLE(Ity_I64, mkexpr(addr)) ); + delta += 3+alen; + DIP("pshufb %s,%s\n", dis_buf, + nameMMXReg(gregOfRM(modrm))); + } + + putMMXReg( + gregOfRM(modrm), + binop( + Iop_And64, + /* permute the lanes */ + binop( + Iop_Perm8x8, + mkexpr(dV), + binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL)) + ), + /* mask off lanes which have (index & 0x80) == 0x80 */ + unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7))) + ) + ); + goto decode_success; + } + + /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x00) { + IRTemp sV = newTemp(Ity_V128); + IRTemp dV = newTemp(Ity_V128); + IRTemp sHi = newTemp(Ity_I64); + IRTemp sLo = newTemp(Ity_I64); + IRTemp dHi = newTemp(Ity_I64); + IRTemp dLo = newTemp(Ity_I64); + IRTemp rHi = newTemp(Ity_I64); + IRTemp rLo = newTemp(Ity_I64); + IRTemp sevens = newTemp(Ity_I64); + IRTemp mask0x80hi = newTemp(Ity_I64); + IRTemp mask0x80lo = newTemp(Ity_I64); + IRTemp maskBit3hi = newTemp(Ity_I64); + IRTemp maskBit3lo = newTemp(Ity_I64); + IRTemp sAnd7hi = newTemp(Ity_I64); + IRTemp sAnd7lo = newTemp(Ity_I64); + IRTemp permdHi = newTemp(Ity_I64); + IRTemp permdLo = newTemp(Ity_I64); + + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRM(modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRM(modrm)) ); + delta += 3+1; + DIP("pshufb %s,%s\n", nameXMMReg(eregOfRM(modrm)), + nameXMMReg(gregOfRM(modrm))); + } else { + addr = disAMode ( &alen, sorb, delta+3, dis_buf ); + gen_SEGV_if_not_16_aligned( addr ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pshufb %s,%s\n", dis_buf, + nameXMMReg(gregOfRM(modrm))); + } + + assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) ); + assign( dLo, unop(Iop_V128to64, mkexpr(dV)) ); + assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) ); + assign( sLo, unop(Iop_V128to64, mkexpr(sV)) ); + + assign( sevens, mkU64(0x0707070707070707ULL) ); + + /* + mask0x80hi = Not(SarN8x8(sHi,7)) + maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7) + sAnd7hi = And(sHi,sevens) + permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi), + And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) ) + rHi = And(permdHi,mask0x80hi) + */ + assign( + mask0x80hi, + unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7)))); + + assign( + maskBit3hi, + binop(Iop_SarN8x8, + binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)), + mkU8(7))); + + assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens))); + + assign( + permdHi, + binop( + Iop_Or64, + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)), + mkexpr(maskBit3hi)), + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)), + unop(Iop_Not64,mkexpr(maskBit3hi))) )); + + assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) ); + + /* And the same for the lower half of the result. What fun. */ + + assign( + mask0x80lo, + unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7)))); + + assign( + maskBit3lo, + binop(Iop_SarN8x8, + binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)), + mkU8(7))); + + assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens))); + + assign( + permdLo, + binop( + Iop_Or64, + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)), + mkexpr(maskBit3lo)), + binop(Iop_And64, + binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)), + unop(Iop_Not64,mkexpr(maskBit3lo))) )); + + assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) ); + + putXMMReg( + gregOfRM(modrm), + binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)) + ); + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSSE3 decoder. --- */ + /* ---------------------------------------------------- */ + after_sse_decoders: /* ---------------------------------------------------- */ diff --git a/VEX/priv/host-amd64/hdefs.c b/VEX/priv/host-amd64/hdefs.c index 2ff737f036..9caf8c2f2c 100644 --- a/VEX/priv/host-amd64/hdefs.c +++ b/VEX/priv/host-amd64/hdefs.c @@ -2692,6 +2692,9 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, case Ijk_SigTRAP: *p++ = 0xBD; p = emit32(p, VEX_TRC_JMP_SIGTRAP); break; + case Ijk_SigSEGV: + *p++ = 0xBD; + p = emit32(p, VEX_TRC_JMP_SIGSEGV); break; case Ijk_Ret: case Ijk_Call: case Ijk_Boring: diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c index c00ec049b0..af50b5394e 100644 --- a/VEX/priv/host-x86/isel.c +++ b/VEX/priv/host-x86/isel.c @@ -2324,6 +2324,12 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) fn = (HWord)h_generic_calc_InterleaveHI32x2; goto binnish; case Iop_InterleaveLO32x2: fn = (HWord)h_generic_calc_InterleaveLO32x2; goto binnish; + case Iop_CatOddLanes16x4: + fn = (HWord)h_generic_calc_CatOddLanes16x4; goto binnish; + case Iop_CatEvenLanes16x4: + fn = (HWord)h_generic_calc_CatEvenLanes16x4; goto binnish; + case Iop_Perm8x8: + fn = (HWord)h_generic_calc_Perm8x8; goto binnish; case Iop_Max8Ux8: fn = (HWord)h_generic_calc_Max8Ux8; goto binnish; @@ -2336,6 +2342,8 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) case Iop_Mul16x4: fn = (HWord)h_generic_calc_Mul16x4; goto binnish; + case Iop_Mul32x2: + fn = (HWord)h_generic_calc_Mul32x2; goto binnish; case Iop_MulHi16Sx4: fn = (HWord)h_generic_calc_MulHi16Sx4; goto binnish; case Iop_MulHi16Ux4: @@ -2401,6 +2409,8 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) fn = (HWord)h_generic_calc_ShlN32x2; goto shifty; case Iop_ShlN16x4: fn = (HWord)h_generic_calc_ShlN16x4; goto shifty; + case Iop_ShlN8x8: + fn = (HWord)h_generic_calc_ShlN8x8; goto shifty; case Iop_ShrN32x2: fn = (HWord)h_generic_calc_ShrN32x2; goto shifty; case Iop_ShrN16x4: