guest subarchitecture. */
if (archinfo->hwcaps == 0/*baseline, no sse at all*/)
goto after_sse_decoders;
-
+
+ /* With mmxext only some extended MMX instructions are recognized.
+ The mmxext instructions are MASKMOVQ MOVNTQ PAVGB PAVGW PMAXSW
+ PMAXUB PMINSW PMINUB PMULHUW PSADBW PSHUFW PEXTRW PINSRW PMOVMSKB
+ PREFETCHNTA PREFETCHT0 PREFETCHT1 PREFETCHT2 SFENCE
+
+ http://support.amd.com/us/Embedded_TechDocs/22466.pdf
+ https://en.wikipedia.org/wiki/3DNow!#3DNow.21_extensions */
+
+ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
+ goto mmxext;
+
/* Otherwise we must be doing sse1 or sse2, so we can at least try
for SSE1 here. */
goto decode_success;
}
+
+ /* mmxext sse1 subset starts here. mmxext only arches will parse
+ only this subset of the sse1 instructions. */
+ mmxext:
+
/* ***--- this is an MMX class insn introduced in SSE1 ---*** */
/* 0F F7 = MASKMOVQ -- 8x8 masked store */
if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
goto decode_success;
}
- /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
- goto decode_success;
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
+ Intel manual does not say anything about the usual business of
+ the FP reg tags getting trashed whenever an MMX insn happens.
+ So we just leave them alone.
+ */
+ if (insn[0] == 0x0F && insn[1] == 0xE7) {
+ modrm = getIByte(delta+2);
+ if (sz == 4 && !epartIsReg(modrm)) {
+ /* do_MMX_preamble(); Intel docs don't specify this */
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
+ DIP("movntq %s,%s\n", dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ goto decode_success;
+ }
+ /* else fall through */
}
- /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
- vassert(sz == 4);
- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pavgb", False );
goto decode_success;
}
- /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pavgw", False );
goto decode_success;
}
- /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
- vassert(sz == 4);
- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
- goto decode_success;
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
+ zero-extend of it in ireg(G). */
+ if (insn[0] == 0x0F && insn[1] == 0xC5) {
+ modrm = insn[2];
+ if (sz == 4 && epartIsReg(modrm)) {
+ IRTemp sV = newTemp(Ity_I64);
+ t5 = newTemp(Ity_I16);
+ do_MMX_preamble();
+ assign(sV, getMMXReg(eregOfRM(modrm)));
+ breakup64to16s( sV, &t3, &t2, &t1, &t0 );
+ switch (insn[3] & 3) {
+ case 0: assign(t5, mkexpr(t0)); break;
+ case 1: assign(t5, mkexpr(t1)); break;
+ case 2: assign(t5, mkexpr(t2)); break;
+ case 3: assign(t5, mkexpr(t3)); break;
+ default: vassert(0); /*NOTREACHED*/
+ }
+ putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
+ DIP("pextrw $%d,%s,%s\n",
+ (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
+ nameIReg(4,gregOfRM(modrm)));
+ delta += 4;
+ goto decode_success;
+ }
+ /* else fall through */
}
- /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
- /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
- if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
- modrm = getIByte(delta+2);
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
+ put it into the specified lane of mmx(G). */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
+ /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
+ mmx reg. t4 is the new lane value. t5 is the original
+ mmx value. t6 is the new mmx value. */
+ Int lane;
+ t4 = newTemp(Ity_I16);
+ t5 = newTemp(Ity_I64);
+ t6 = newTemp(Ity_I64);
+ modrm = insn[2];
+ do_MMX_preamble();
+
+ assign(t5, getMMXReg(gregOfRM(modrm)));
+ breakup64to16s( t5, &t3, &t2, &t1, &t0 );
+
if (epartIsReg(modrm)) {
- putXMMReg( gregOfRM(modrm),
- getXMMReg( eregOfRM(modrm) ));
- DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
- nameXMMReg(gregOfRM(modrm)));
- delta += 2+1;
+ assign(t4, getIReg(2, eregOfRM(modrm)));
+ delta += 3+1;
+ lane = insn[3+1-1];
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ nameIReg(2,eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
} else {
addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- if (insn[1] == 0x28/*movaps*/)
- gen_SEGV_if_not_16_aligned( addr );
- putXMMReg( gregOfRM(modrm),
- loadLE(Ity_V128, mkexpr(addr)) );
- DIP("mov[ua]ps %s,%s\n", dis_buf,
- nameXMMReg(gregOfRM(modrm)));
- delta += 2+alen;
+ delta += 3+alen;
+ lane = insn[3+alen-1];
+ assign(t4, loadLE(Ity_I16, mkexpr(addr)));
+ DIP("pinsrw $%d,%s,%s\n", (Int)lane,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+
+ switch (lane & 3) {
+ case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
+ case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
+ case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
+ case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
+ default: vassert(0); /*NOTREACHED*/
}
+ putMMXReg(gregOfRM(modrm), mkexpr(t6));
goto decode_success;
}
- /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
- /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
- if (sz == 4 && insn[0] == 0x0F
- && (insn[1] == 0x29 || insn[1] == 0x11)) {
- modrm = getIByte(delta+2);
- if (epartIsReg(modrm)) {
- /* fall through; awaiting test case */
- } else {
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- if (insn[1] == 0x29/*movaps*/)
- gen_SEGV_if_not_16_aligned( addr );
- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
- DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
- dis_buf );
- delta += 2+alen;
- goto decode_success;
- }
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EE = PMAXSW -- 16x4 signed max */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pmaxsw", False );
+ goto decode_success;
}
- /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
- /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
- modrm = getIByte(delta+2);
- if (epartIsReg(modrm)) {
- delta += 2+1;
- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
- getXMMRegLane64( eregOfRM(modrm), 0 ) );
- DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
- nameXMMReg(gregOfRM(modrm)));
- } else {
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- delta += 2+alen;
- putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
- loadLE(Ity_I64, mkexpr(addr)) );
- DIP("movhps %s,%s\n", dis_buf,
- nameXMMReg( gregOfRM(modrm) ));
- }
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DE = PMAXUB -- 8x8 unsigned max */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pmaxub", False );
goto decode_success;
}
- /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
- if (!epartIsReg(insn[2])) {
- delta += 2;
- addr = disAMode ( &alen, sorb, delta, dis_buf );
- delta += alen;
- storeLE( mkexpr(addr),
- getXMMRegLane64( gregOfRM(insn[2]),
- 1/*upper lane*/ ) );
- DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
- dis_buf);
- goto decode_success;
- }
- /* else fall through */
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F EA = PMINSW -- 16x4 signed min */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pminsw", False );
+ goto decode_success;
}
- /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
- /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
- modrm = getIByte(delta+2);
- if (epartIsReg(modrm)) {
- delta += 2+1;
- putXMMRegLane64( gregOfRM(modrm),
- 0/*lower lane*/,
- getXMMRegLane64( eregOfRM(modrm), 1 ));
- DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
- nameXMMReg(gregOfRM(modrm)));
- } else {
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- delta += 2+alen;
- putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
- loadLE(Ity_I64, mkexpr(addr)) );
- DIP("movlps %s, %s\n",
- dis_buf, nameXMMReg( gregOfRM(modrm) ));
- }
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F DA = PMINUB -- 8x8 unsigned min */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pminub", False );
goto decode_success;
}
- /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
- if (!epartIsReg(insn[2])) {
- delta += 2;
- addr = disAMode ( &alen, sorb, delta, dis_buf );
- delta += alen;
- storeLE( mkexpr(addr),
- getXMMRegLane64( gregOfRM(insn[2]),
- 0/*lower lane*/ ) );
- DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
- dis_buf);
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
+ mmx(E), turn them into a byte, and put zero-extend of it in
+ ireg(G). */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
+ modrm = insn[2];
+ if (epartIsReg(modrm)) {
+ do_MMX_preamble();
+ t0 = newTemp(Ity_I64);
+ t1 = newTemp(Ity_I32);
+ assign(t0, getMMXReg(eregOfRM(modrm)));
+ assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
+ putIReg(4, gregOfRM(modrm), mkexpr(t1));
+ DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
+ nameIReg(4,gregOfRM(modrm)));
+ delta += 3;
goto decode_success;
- }
+ }
/* else fall through */
}
- /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
- to 4 lowest bits of ireg(G) */
- if (insn[0] == 0x0F && insn[1] == 0x50) {
- modrm = getIByte(delta+2);
- if (sz == 4 && epartIsReg(modrm)) {
- Int src;
- t0 = newTemp(Ity_I32);
- t1 = newTemp(Ity_I32);
- t2 = newTemp(Ity_I32);
- t3 = newTemp(Ity_I32);
- delta += 2+1;
- src = eregOfRM(modrm);
- assign( t0, binop( Iop_And32,
- binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
- mkU32(1) ));
- assign( t1, binop( Iop_And32,
- binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
- mkU32(2) ));
- assign( t2, binop( Iop_And32,
- binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
- mkU32(4) ));
- assign( t3, binop( Iop_And32,
- binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
- mkU32(8) ));
- putIReg(4, gregOfRM(modrm),
- binop(Iop_Or32,
- binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
- binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
- )
- );
- DIP("movmskps %s,%s\n", nameXMMReg(src),
- nameIReg(4, gregOfRM(modrm)));
- goto decode_success;
- }
- /* else fall through */
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "pmuluh", False );
+ goto decode_success;
}
- /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
- /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
- if (insn[0] == 0x0F && insn[1] == 0x2B) {
+ /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
+ /* 0F 18 /1 = PREFETCH0 -- with various different hints */
+ /* 0F 18 /2 = PREFETCH1 */
+ /* 0F 18 /3 = PREFETCH2 */
+ if (insn[0] == 0x0F && insn[1] == 0x18
+ && !epartIsReg(insn[2])
+ && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
+ const HChar* hintstr = "??";
+
modrm = getIByte(delta+2);
- if (!epartIsReg(modrm)) {
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- gen_SEGV_if_not_16_aligned( addr );
- storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
- DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
- dis_buf,
- nameXMMReg(gregOfRM(modrm)));
- delta += 2+alen;
- goto decode_success;
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ switch (gregOfRM(modrm)) {
+ case 0: hintstr = "nta"; break;
+ case 1: hintstr = "t0"; break;
+ case 2: hintstr = "t1"; break;
+ case 3: hintstr = "t2"; break;
+ default: vassert(0); /*NOTREACHED*/
}
- /* else fall through */
+
+ DIP("prefetch%s %s\n", hintstr, dis_buf);
+ goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
- Intel manual does not say anything about the usual business of
- the FP reg tags getting trashed whenever an MMX insn happens.
- So we just leave them alone.
- */
- if (insn[0] == 0x0F && insn[1] == 0xE7) {
+ /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
+ /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
+ if (insn[0] == 0x0F && insn[1] == 0x0D
+ && !epartIsReg(insn[2])
+ && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
+ const HChar* hintstr = "??";
+
modrm = getIByte(delta+2);
- if (sz == 4 && !epartIsReg(modrm)) {
- /* do_MMX_preamble(); Intel docs don't specify this */
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- storeLE( mkexpr(addr), getMMXReg(gregOfRM(modrm)) );
- DIP("movntq %s,%s\n", dis_buf,
- nameMMXReg(gregOfRM(modrm)));
- delta += 2+alen;
- goto decode_success;
+ vassert(!epartIsReg(modrm));
+
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+
+ switch (gregOfRM(modrm)) {
+ case 0: hintstr = ""; break;
+ case 1: hintstr = "w"; break;
+ default: vassert(0); /*NOTREACHED*/
}
- /* else fall through */
+
+ DIP("prefetch%s %s\n", hintstr, dis_buf);
+ goto decode_success;
}
- /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
- (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
- vassert(sz == 4);
- modrm = getIByte(delta+3);
- if (epartIsReg(modrm)) {
- putXMMRegLane32( gregOfRM(modrm), 0,
- getXMMRegLane32( eregOfRM(modrm), 0 ));
- DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
- nameXMMReg(gregOfRM(modrm)));
- delta += 3+1;
- } else {
- addr = disAMode ( &alen, sorb, delta+3, dis_buf );
- /* zero bits 127:64 */
- putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
- /* zero bits 63:32 */
- putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
- /* write bits 31:0 */
- putXMMRegLane32( gregOfRM(modrm), 0,
- loadLE(Ity_I32, mkexpr(addr)) );
- DIP("movss %s,%s\n", dis_buf,
- nameXMMReg(gregOfRM(modrm)));
- delta += 3+alen;
- }
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
+ do_MMX_preamble();
+ delta = dis_MMXop_regmem_to_reg (
+ sorb, delta+2, insn[1], "psadbw", False );
goto decode_success;
}
- /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
- or lo 1/4 xmm). */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
- vassert(sz == 4);
- modrm = getIByte(delta+3);
+ /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+ /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
+ Int order;
+ IRTemp sV, dV, s3, s2, s1, s0;
+ s3 = s2 = s1 = s0 = IRTemp_INVALID;
+ sV = newTemp(Ity_I64);
+ dV = newTemp(Ity_I64);
+ do_MMX_preamble();
+ modrm = insn[2];
if (epartIsReg(modrm)) {
- /* fall through, we don't yet have a test case */
+ assign( sV, getMMXReg(eregOfRM(modrm)) );
+ order = (Int)insn[3];
+ delta += 2+2;
+ DIP("pshufw $%d,%s,%s\n", order,
+ nameMMXReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)));
} else {
- addr = disAMode ( &alen, sorb, delta+3, dis_buf );
- storeLE( mkexpr(addr),
- getXMMRegLane32(gregOfRM(modrm), 0) );
- DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
- dis_buf);
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
+ order = (Int)insn[2+alen];
delta += 3+alen;
- goto decode_success;
+ DIP("pshufw $%d,%s,%s\n", order,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
}
- }
+ breakup64to16s( sV, &s3, &s2, &s1, &s0 );
- /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
+# define SEL(n) \
+ ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
+ assign(dV,
+ mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
+ SEL((order>>2)&3), SEL((order>>0)&3) )
+ );
+ putMMXReg(gregOfRM(modrm), mkexpr(dV));
+# undef SEL
goto decode_success;
}
- /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
+ /* 0F AE /7 = SFENCE -- flush pending operations to memory */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
vassert(sz == 4);
- delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
+ delta += 3;
+ /* Insert a memory fence. It's sometimes important that these
+ are carried through to the generated code. */
+ stmt( IRStmt_MBE(Imbe_Fence) );
+ DIP("sfence\n");
goto decode_success;
}
- /* 0F 56 = ORPS -- G = G and E */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
- delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
+ /* End of mmxext sse1 subset. No more sse parsing for mmxext only arches. */
+ if (archinfo->hwcaps == VEX_HWCAPS_X86_MMXEXT/*integer only sse1 subset*/)
+ goto after_sse_decoders;
+
+
+ /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE0) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "pavgb", False );
+ /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5F) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "maxss", Iop_Max32F0x4 );
goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE3) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "pavgw", False );
+ /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5D) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "minps", Iop_Min32Fx4 );
goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
- zero-extend of it in ireg(G). */
- if (insn[0] == 0x0F && insn[1] == 0xC5) {
- modrm = insn[2];
- if (sz == 4 && epartIsReg(modrm)) {
- IRTemp sV = newTemp(Ity_I64);
- t5 = newTemp(Ity_I16);
- do_MMX_preamble();
- assign(sV, getMMXReg(eregOfRM(modrm)));
- breakup64to16s( sV, &t3, &t2, &t1, &t0 );
- switch (insn[3] & 3) {
- case 0: assign(t5, mkexpr(t0)); break;
- case 1: assign(t5, mkexpr(t1)); break;
- case 2: assign(t5, mkexpr(t2)); break;
- case 3: assign(t5, mkexpr(t3)); break;
- default: vassert(0); /*NOTREACHED*/
- }
- putIReg(4, gregOfRM(modrm), unop(Iop_16Uto32, mkexpr(t5)));
- DIP("pextrw $%d,%s,%s\n",
- (Int)insn[3], nameMMXReg(eregOfRM(modrm)),
- nameIReg(4,gregOfRM(modrm)));
- delta += 4;
- goto decode_success;
- }
- /* else fall through */
+ /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5D) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "minss", Iop_Min32F0x4 );
+ goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
- put it into the specified lane of mmx(G). */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC4) {
- /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
- mmx reg. t4 is the new lane value. t5 is the original
- mmx value. t6 is the new mmx value. */
- Int lane;
- t4 = newTemp(Ity_I16);
- t5 = newTemp(Ity_I64);
- t6 = newTemp(Ity_I64);
- modrm = insn[2];
- do_MMX_preamble();
-
- assign(t5, getMMXReg(gregOfRM(modrm)));
- breakup64to16s( t5, &t3, &t2, &t1, &t0 );
-
+ /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
+ /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
+ if (sz == 4 && insn[0] == 0x0F && (insn[1] == 0x28 || insn[1] == 0x10)) {
+ modrm = getIByte(delta+2);
if (epartIsReg(modrm)) {
- assign(t4, getIReg(2, eregOfRM(modrm)));
- delta += 3+1;
- lane = insn[3+1-1];
- DIP("pinsrw $%d,%s,%s\n", (Int)lane,
- nameIReg(2,eregOfRM(modrm)),
- nameMMXReg(gregOfRM(modrm)));
+ putXMMReg( gregOfRM(modrm),
+ getXMMReg( eregOfRM(modrm) ));
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+1;
} else {
addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- delta += 3+alen;
- lane = insn[3+alen-1];
- assign(t4, loadLE(Ity_I16, mkexpr(addr)));
- DIP("pinsrw $%d,%s,%s\n", (Int)lane,
- dis_buf,
- nameMMXReg(gregOfRM(modrm)));
- }
-
- switch (lane & 3) {
- case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
- case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
- case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
- case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
- default: vassert(0); /*NOTREACHED*/
+ if (insn[1] == 0x28/*movaps*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ putXMMReg( gregOfRM(modrm),
+ loadLE(Ity_V128, mkexpr(addr)) );
+ DIP("mov[ua]ps %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
}
- putMMXReg(gregOfRM(modrm), mkexpr(t6));
goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F EE = PMAXSW -- 16x4 signed max */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEE) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "pmaxsw", False );
- goto decode_success;
+ /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
+ /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
+ if (sz == 4 && insn[0] == 0x0F
+ && (insn[1] == 0x29 || insn[1] == 0x11)) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ /* fall through; awaiting test case */
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ if (insn[1] == 0x29/*movaps*/)
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("mov[ua]ps %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+ dis_buf );
+ delta += 2+alen;
+ goto decode_success;
+ }
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F DE = PMAXUB -- 8x8 unsigned max */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDE) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "pmaxub", False );
+ /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
+ /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x16) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
+ getXMMRegLane64( eregOfRM(modrm), 0 ) );
+ DIP("movhps %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRM(modrm), 1/*upper lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movhps %s,%s\n", dis_buf,
+ nameXMMReg( gregOfRM(modrm) ));
+ }
goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F EA = PMINSW -- 16x4 signed min */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xEA) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "pminsw", False );
- goto decode_success;
+ /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x17) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRM(insn[2]),
+ 1/*upper lane*/ ) );
+ DIP("movhps %s,%s\n", nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
+ goto decode_success;
+ }
+ /* else fall through */
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F DA = PMINUB -- 8x8 unsigned min */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xDA) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "pminub", False );
+ /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
+ /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x12) {
+ modrm = getIByte(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ putXMMRegLane64( gregOfRM(modrm),
+ 0/*lower lane*/,
+ getXMMRegLane64( eregOfRM(modrm), 1 ));
+ DIP("movhlps %s, %s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ delta += 2+alen;
+ putXMMRegLane64( gregOfRM(modrm), 0/*lower lane*/,
+ loadLE(Ity_I64, mkexpr(addr)) );
+ DIP("movlps %s, %s\n",
+ dis_buf, nameXMMReg( gregOfRM(modrm) ));
+ }
goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
- mmx(E), turn them into a byte, and put zero-extend of it in
- ireg(G). */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xD7) {
- modrm = insn[2];
- if (epartIsReg(modrm)) {
- do_MMX_preamble();
- t0 = newTemp(Ity_I64);
- t1 = newTemp(Ity_I32);
- assign(t0, getMMXReg(eregOfRM(modrm)));
- assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
- putIReg(4, gregOfRM(modrm), mkexpr(t1));
- DIP("pmovmskb %s,%s\n", nameMMXReg(eregOfRM(modrm)),
- nameIReg(4,gregOfRM(modrm)));
- delta += 3;
+ /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x13) {
+ if (!epartIsReg(insn[2])) {
+ delta += 2;
+ addr = disAMode ( &alen, sorb, delta, dis_buf );
+ delta += alen;
+ storeLE( mkexpr(addr),
+ getXMMRegLane64( gregOfRM(insn[2]),
+ 0/*lower lane*/ ) );
+ DIP("movlps %s, %s\n", nameXMMReg( gregOfRM(insn[2]) ),
+ dis_buf);
goto decode_success;
- }
+ }
/* else fall through */
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xE4) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "pmuluh", False );
- goto decode_success;
- }
-
- /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
- /* 0F 18 /1 = PREFETCH0 -- with various different hints */
- /* 0F 18 /2 = PREFETCH1 */
- /* 0F 18 /3 = PREFETCH2 */
- if (insn[0] == 0x0F && insn[1] == 0x18
- && !epartIsReg(insn[2])
- && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 3) {
- const HChar* hintstr = "??";
-
+ /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
+ to 4 lowest bits of ireg(G) */
+ if (insn[0] == 0x0F && insn[1] == 0x50) {
modrm = getIByte(delta+2);
- vassert(!epartIsReg(modrm));
-
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- delta += 2+alen;
-
- switch (gregOfRM(modrm)) {
- case 0: hintstr = "nta"; break;
- case 1: hintstr = "t0"; break;
- case 2: hintstr = "t1"; break;
- case 3: hintstr = "t2"; break;
- default: vassert(0); /*NOTREACHED*/
+ if (sz == 4 && epartIsReg(modrm)) {
+ Int src;
+ t0 = newTemp(Ity_I32);
+ t1 = newTemp(Ity_I32);
+ t2 = newTemp(Ity_I32);
+ t3 = newTemp(Ity_I32);
+ delta += 2+1;
+ src = eregOfRM(modrm);
+ assign( t0, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,0), mkU8(31)),
+ mkU32(1) ));
+ assign( t1, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,1), mkU8(30)),
+ mkU32(2) ));
+ assign( t2, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,2), mkU8(29)),
+ mkU32(4) ));
+ assign( t3, binop( Iop_And32,
+ binop(Iop_Shr32, getXMMRegLane32(src,3), mkU8(28)),
+ mkU32(8) ));
+ putIReg(4, gregOfRM(modrm),
+ binop(Iop_Or32,
+ binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
+ binop(Iop_Or32, mkexpr(t2), mkexpr(t3))
+ )
+ );
+ DIP("movmskps %s,%s\n", nameXMMReg(src),
+ nameIReg(4, gregOfRM(modrm)));
+ goto decode_success;
}
-
- DIP("prefetch%s %s\n", hintstr, dis_buf);
- goto decode_success;
+ /* else fall through */
}
- /* 0F 0D /0 = PREFETCH m8 -- 3DNow! prefetch */
- /* 0F 0D /1 = PREFETCHW m8 -- ditto, with some other hint */
- if (insn[0] == 0x0F && insn[1] == 0x0D
- && !epartIsReg(insn[2])
- && gregOfRM(insn[2]) >= 0 && gregOfRM(insn[2]) <= 1) {
- const HChar* hintstr = "??";
-
+ /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
+ /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
+ if (insn[0] == 0x0F && insn[1] == 0x2B) {
modrm = getIByte(delta+2);
- vassert(!epartIsReg(modrm));
-
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- delta += 2+alen;
-
- switch (gregOfRM(modrm)) {
- case 0: hintstr = ""; break;
- case 1: hintstr = "w"; break;
- default: vassert(0); /*NOTREACHED*/
+ if (!epartIsReg(modrm)) {
+ addr = disAMode ( &alen, sorb, delta+2, dis_buf );
+ gen_SEGV_if_not_16_aligned( addr );
+ storeLE( mkexpr(addr), getXMMReg(gregOfRM(modrm)) );
+ DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 2+alen;
+ goto decode_success;
}
-
- DIP("prefetch%s %s\n", hintstr, dis_buf);
- goto decode_success;
+ /* else fall through */
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF6) {
- do_MMX_preamble();
- delta = dis_MMXop_regmem_to_reg (
- sorb, delta+2, insn[1], "psadbw", False );
+ /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
+ (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x10) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
+ if (epartIsReg(modrm)) {
+ putXMMRegLane32( gregOfRM(modrm), 0,
+ getXMMRegLane32( eregOfRM(modrm), 0 ));
+ DIP("movss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+1;
+ } else {
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ /* zero bits 127:64 */
+ putXMMRegLane64( gregOfRM(modrm), 1, mkU64(0) );
+ /* zero bits 63:32 */
+ putXMMRegLane32( gregOfRM(modrm), 1, mkU32(0) );
+ /* write bits 31:0 */
+ putXMMRegLane32( gregOfRM(modrm), 0,
+ loadLE(Ity_I32, mkexpr(addr)) );
+ DIP("movss %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ delta += 3+alen;
+ }
goto decode_success;
}
- /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
- /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
- if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x70) {
- Int order;
- IRTemp sV, dV, s3, s2, s1, s0;
- s3 = s2 = s1 = s0 = IRTemp_INVALID;
- sV = newTemp(Ity_I64);
- dV = newTemp(Ity_I64);
- do_MMX_preamble();
- modrm = insn[2];
+ /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
+ or lo 1/4 xmm). */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x11) {
+ vassert(sz == 4);
+ modrm = getIByte(delta+3);
if (epartIsReg(modrm)) {
- assign( sV, getMMXReg(eregOfRM(modrm)) );
- order = (Int)insn[3];
- delta += 2+2;
- DIP("pshufw $%d,%s,%s\n", order,
- nameMMXReg(eregOfRM(modrm)),
- nameMMXReg(gregOfRM(modrm)));
+ /* fall through, we don't yet have a test case */
} else {
- addr = disAMode ( &alen, sorb, delta+2, dis_buf );
- assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
- order = (Int)insn[2+alen];
+ addr = disAMode ( &alen, sorb, delta+3, dis_buf );
+ storeLE( mkexpr(addr),
+ getXMMRegLane32(gregOfRM(modrm), 0) );
+ DIP("movss %s,%s\n", nameXMMReg(gregOfRM(modrm)),
+ dis_buf);
delta += 3+alen;
- DIP("pshufw $%d,%s,%s\n", order,
- dis_buf,
- nameMMXReg(gregOfRM(modrm)));
+ goto decode_success;
}
- breakup64to16s( sV, &s3, &s2, &s1, &s0 );
+ }
-# define SEL(n) \
- ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
- assign(dV,
- mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
- SEL((order>>2)&3), SEL((order>>0)&3) )
- );
- putMMXReg(gregOfRM(modrm), mkexpr(dV));
-# undef SEL
+ /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x59) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "mulps", Iop_Mul32Fx4 );
+ goto decode_success;
+ }
+
+ /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x59) {
+ vassert(sz == 4);
+ delta = dis_SSE_E_to_G_lo32( sorb, delta+3, "mulss", Iop_Mul32F0x4 );
+ goto decode_success;
+ }
+
+ /* 0F 56 = ORPS -- G = G and E */
+ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x56) {
+ delta = dis_SSE_E_to_G_all( sorb, delta+2, "orps", Iop_OrV128 );
goto decode_success;
}
goto decode_success;
}
- /* 0F AE /7 = SFENCE -- flush pending operations to memory */
- if (insn[0] == 0x0F && insn[1] == 0xAE
- && epartIsReg(insn[2]) && gregOfRM(insn[2]) == 7) {
- vassert(sz == 4);
- delta += 3;
- /* Insert a memory fence. It's sometimes important that these
- are carried through to the generated code. */
- stmt( IRStmt_MBE(Imbe_Fence) );
- DIP("sfence\n");
- goto decode_success;
- }
-
/* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xC6) {
Int select;
fAddr = &x86g_dirtyhelper_CPUID_sse1;
}
else
+ if (archinfo->hwcaps & VEX_HWCAPS_X86_MMXEXT) {
+ fName = "x86g_dirtyhelper_CPUID_mmxext";
+ fAddr = &x86g_dirtyhelper_CPUID_mmxext;
+ }
+ else
if (archinfo->hwcaps == 0/*no SSE*/) {
fName = "x86g_dirtyhelper_CPUID_sse0";
fAddr = &x86g_dirtyhelper_CPUID_sse0;