/*--- %rflags run-time helpers. ---*/
/*---------------------------------------------------------------*/
+/* Do 64x64 -> 128 signed/unsigned multiplies, for computing flags
+ after imulq/mulq. */
+
+static void mullS64 ( Long u, Long v, Long* rHi, Long* rLo )
+{
+ ULong u0, v0, w0;
+ Long u1, v1, w1, w2, t;
+ u0 = u & 0xFFFFFFFF;
+ u1 = u >> 32;
+ v0 = v & 0xFFFFFFFF;
+ v1 = v >> 32;
+ w0 = u0 * v0;
+ t = u1 * v0 + (w0 >> 32);
+ w1 = t & 0xFFFFFFFF;
+ w2 = t >> 32;
+ w1 = u0 * v1 + w1;
+ *rHi = u1 * v1 + w2 + (w1 >> 32);
+ *rLo = u * v;
+}
+
+static void mullU64 ( ULong u, ULong v, ULong* rHi, ULong* rLo )
+{
+ ULong u0, v0, w0;
+ ULong u1, v1, w1,w2,t;
+ u0 = u & 0xFFFFFFFF;
+ u1 = u >> 32;
+ v0 = v & 0xFFFFFFFF;
+ v1 = v >> 32;
+ w0 = u0 * v0;
+ t = u1 * v0 + (w0 >> 32);
+ w1 = t & 0xFFFFFFFF;
+ w2 = t >> 32;
+ w1 = u0 * v1 + w1;
+ *rHi = u1 * v1 + w2 + (w1 >> 32);
+ *rLo = u * v;
+}
+
+
static const UChar parity_table[256] = {
AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0,
0, AMD64G_CC_MASK_P, AMD64G_CC_MASK_P, 0, AMD64G_CC_MASK_P, 0, 0, AMD64G_CC_MASK_P,
} \
}
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_UMULQ \
+{ \
+ PREAMBLE(64); \
+ { Long cf, pf, af, zf, sf, of; \
+ ULong lo, hi; \
+ mullU64( (ULong)CC_DEP1, (ULong)CC_DEP2, &hi, &lo ); \
+ cf = (hi != 0); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - 64) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
+/*-------------------------------------------------------------*/
+
+#define ACTIONS_SMULQ \
+{ \
+ PREAMBLE(64); \
+ { Long cf, pf, af, zf, sf, of; \
+ Long lo, hi; \
+ mullS64( (Long)CC_DEP1, (Long)CC_DEP2, &hi, &lo ); \
+ cf = (hi != (lo >>/*s*/ (64-1))); \
+ pf = parity_table[(UChar)lo]; \
+ af = 0; /* undefined */ \
+ zf = (lo == 0) << 6; \
+ sf = lshift(lo, 8 - 64) & 0x80; \
+ of = cf << 11; \
+ return cf | pf | af | zf | sf | of; \
+ } \
+}
+
#if PROFILE_EFLAGS
case AMD64G_CC_OP_SMULB: ACTIONS_SMUL( 8, Char, Short );
case AMD64G_CC_OP_SMULW: ACTIONS_SMUL( 16, Short, Int );
case AMD64G_CC_OP_SMULL: ACTIONS_SMUL( 32, Int, Long );
+ case AMD64G_CC_OP_SMULQ: ACTIONS_SMULQ;
default:
/* shouldn't really make these calls from generated code */
stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
}
-//.. static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
-//.. {
-//.. vassert(typeOfIRExpr(irbb->tyenv,e) == Ity_F64);
-//.. stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
-//.. }
-//..
+static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
+{
+ vassert(typeOfIRExpr(irbb->tyenv,e) == Ity_F64);
+ stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
+}
+
//.. static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
//.. {
//.. vassert(typeOfIRExpr(irbb->tyenv,e) == Ity_F32);
//.. putXMMReg( eregOfRM(rm), mkexpr(e1) );
//.. return delta;
//.. }
-//..
-//..
-//.. /* Get the current SSE rounding mode. */
-//..
-//.. static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
-//.. {
-//.. return binop( Iop_And32,
-//.. IRExpr_Get( OFFB_SSEROUND, Ity_I32 ),
-//.. mkU32(3) );
-//.. }
-//..
+
+
+/* Get the current SSE rounding mode. */
+
+static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
+{
+ return
+ unop( Iop_64to32,
+ binop( Iop_And64,
+ IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
+ mkU64(3) ));
+}
+
//.. static void put_sse_roundingmode ( IRExpr* sseround )
//.. {
//.. vassert(typeOfIRExpr(irbb->tyenv, sseround) == Ity_I32);
//.. delta = dis_SSE_E_to_G_all_invG( sorb, delta+2, "andnpd", Iop_And128 );
//.. goto decode_success;
//.. }
-//..
-//.. /* 66 0F 54 = ANDPD -- G = G and E */
-//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x54) {
-//.. delta = dis_SSE_E_to_G_all( sorb, delta+2, "andpd", Iop_And128 );
-//.. goto decode_success;
-//.. }
-//..
+
+ /* 66 0F 54 = ANDPD -- G = G and E */
+ if (have66noF2noF3(pfx) && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x54) {
+ delta = dis_SSE_E_to_G_all( pfx, delta+2, "andpd", Iop_AndV128 );
+ goto decode_success;
+ }
+
//.. /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0xC2) {
//.. delta = dis_SSEcmp_E_to_G( sorb, delta+2, "cmppd", True, 8 );
//..
//.. /* F2 0F 2D = CVTSD2SI -- convert F64 in mem/low half xmm to
//.. I32 in ireg, according to prevailing SSE rounding mode */
-//.. /* F2 0F 2C = CVTTSD2SI -- convert F64 in mem/low half xmm to
-//.. I32 in ireg, according to prevailing SSE rounding mode */
-//.. if (insn[0] == 0xF2 && insn[1] == 0x0F
-//.. && (insn[2] == 0x2D || insn[2] == 0x2C)) {
-//.. IRTemp rmode = newTemp(Ity_I32);
-//.. IRTemp f64lo = newTemp(Ity_F64);
-//.. Bool r2zero = insn[2] == 0x2C;
-//.. vassert(sz == 4);
-//..
-//.. modrm = getUChar(delta+3);
-//.. if (epartIsReg(modrm)) {
-//.. delta += 3+1;
-//.. assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
-//.. DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameIReg(4, gregOfRM(modrm)));
-//.. } else {
-//.. addr = disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
-//.. delta += 3+alen;
-//.. DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
-//.. dis_buf,
-//.. nameIReg(4, gregOfRM(modrm)));
-//.. }
-//..
-//.. if (r2zero) {
-//.. assign( rmode, mkU32((UInt)Irrm_ZERO) );
-//.. } else {
-//.. assign( rmode, get_sse_roundingmode() );
-//.. }
-//..
-//.. putIReg(4, gregOfRM(modrm),
-//.. binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo)) );
-//..
-//.. goto decode_success;
-//.. }
-//..
+ /* F2 0F 2C = CVTTSD2SI
+ when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
+ truncating towards zero
+ when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
+ truncating towards zero
+ */
+ if (haveF2no66noF3(pfx)
+ && insn[0] == 0x0F
+ && ( /* insn[1] == 0x2D || */ insn[1] == 0x2C)) {
+ IRTemp rmode = newTemp(Ity_I32);
+ IRTemp f64lo = newTemp(Ity_F64);
+ Bool r2zero = insn[1] == 0x2C;
+ vassert(sz == 4 || sz == 8);
+
+ modrm = getUChar(delta+2);
+ if (epartIsReg(modrm)) {
+ delta += 2+1;
+ assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
+ DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameIRegR(pfx, 4, gregOfRM(modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+ delta += 2+alen;
+ DIP("cvt%ssd2si %s,%s\n", r2zero ? "t" : "",
+ dis_buf,
+ nameIRegR(pfx, 4, gregOfRM(modrm)));
+ }
+
+ if (r2zero) {
+ assign( rmode, mkU32((UInt)Irrm_ZERO) );
+ } else {
+ assign( rmode, get_sse_roundingmode() );
+ }
+
+ if (sz == 4) {
+ putIRegR( pfx, 4, gregOfRM(modrm),
+ binop( Iop_F64toI32, mkexpr(rmode), mkexpr(f64lo)) );
+ } else {
+ goto decode_failure; /* awaiting test case */
+ putIRegR( pfx, 8, gregOfRM(modrm),
+ binop( Iop_F64toI64, mkexpr(rmode), mkexpr(f64lo)) );
+ }
+
+ goto decode_success;
+ }
+
//.. /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
//.. low 1/4 xmm(G), according to prevailing SSE rounding mode */
//.. if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5A) {
//.. modrm = getUChar(delta+3);
//.. if (epartIsReg(modrm)) {
//.. delta += 3+1;
-//.. assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
+//.. assign(f64lo, getXMMRegLane64F(eregOfRM(modrm), 0));
//.. DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRM(modrm)),
//.. nameXMMReg(gregOfRM(modrm)));
//.. } else {
//.. addr = disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
+//.. assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
//.. delta += 3+alen;
//.. DIP("cvtsd2ss %s,%s\n", dis_buf,
//.. nameXMMReg(gregOfRM(modrm)));
//..
//.. goto decode_success;
//.. }
-//..
-//.. /* F2 0F 2A = CVTSI2SD -- convert I32 in mem/ireg to F64 in low
-//.. half xmm */
-//.. if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x2A) {
-//.. IRTemp arg32 = newTemp(Ity_I32);
-//.. vassert(sz == 4);
-//..
-//.. modrm = getUChar(delta+3);
-//.. if (epartIsReg(modrm)) {
-//.. assign( arg32, getIReg(4, eregOfRM(modrm)) );
-//.. delta += 3+1;
-//.. DIP("cvtsi2sd %s,%s\n", nameIReg(4, eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr = disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
-//.. delta += 3+alen;
-//.. DIP("cvtsi2sd %s,%s\n", dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)) );
-//.. }
-//..
-//.. putXMMRegLane64F(
-//.. gregOfRM(modrm), 0,
-//.. unop(Iop_I32toF64, mkexpr(arg32)) );
-//..
-//.. goto decode_success;
-//.. }
-//..
+
+ /* F2 0F 2A = CVTSI2SD
+ when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
+ when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
+ */
+ if (haveF2no66noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x2A) {
+ vassert(sz == 4 || sz == 8);
+ modrm = getUChar(delta+2);
+
+ if (sz == 4) {
+ IRTemp arg32 = newTemp(Ity_I32);
+ if (epartIsReg(modrm)) {
+ assign( arg32, getIRegB(pfx, 4, eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtsi2sd %s,%s\n", nameIRegB(pfx, 4, eregOfRM(modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtsi2sd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
+ unop(Iop_I32toF64, mkexpr(arg32))
+ );
+ } else {
+ /* sz == 8 */
+ IRTemp arg64 = newTemp(Ity_I64);
+ if (epartIsReg(modrm)) {
+ assign( arg64, getIRegB(pfx, 8, eregOfRM(modrm)) );
+ delta += 2+1;
+ DIP("cvtsi2sd %s,%s\n", nameIRegB(pfx, 8, eregOfRM(modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("cvtsi2sd %s,%s\n", dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)) );
+ }
+ putXMMRegLane64F(
+ gregOfRexRM(pfx,modrm),
+ 0,
+ binop( Iop_I64toF64,
+ get_sse_roundingmode(),
+ mkexpr(arg64)
+ )
+ );
+
+ }
+
+ goto decode_success;
+ }
+
//.. /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
//.. low half xmm(G) */
//.. if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
//.. modrm = getUChar(delta+3);
//.. if (epartIsReg(modrm)) {
//.. delta += 3+1;
-//.. assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
+//.. assign(f32lo, getXMMRegLane32F(eregOfRM(modrm), 0));
//.. DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRM(modrm)),
//.. nameXMMReg(gregOfRM(modrm)));
//.. } else {
//.. addr = disAMode ( &alen, sorb, delta+3, dis_buf );
-//.. assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
+//.. assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
//.. delta += 3+alen;
//.. DIP("cvtss2sd %s,%s\n", dis_buf,
//.. nameXMMReg(gregOfRM(modrm)));
//.. nameXMMReg(gregOfRM(modrm)));
//.. } else {
//.. addr = disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
+//.. assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
//.. delta += 2+alen;
//.. DIP("cvttpd2dq %s,%s\n", dis_buf,
//.. nameXMMReg(gregOfRM(modrm)) );
//.. delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxpd", Iop_Max64Fx2 );
//.. goto decode_success;
//.. }
-//..
-//.. /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
-//.. if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x5F) {
-//.. vassert(sz == 4);
-//.. delta = dis_SSE_E_to_G_lo64( sorb, delta+3, "maxsd", Iop_Max64F0x2 );
-//.. goto decode_success;
-//.. }
-//..
+
+ /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
+ if (haveF2no66noF3(pfx) && sz == 4
+ && insn[0] == 0x0F && insn[1] == 0x5F) {
+ delta = dis_SSE_E_to_G_lo64( pfx, delta+2, "maxsd", Iop_Max64F0x2 );
+ goto decode_success;
+ }
+
//.. /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
//.. if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x5D) {
//.. delta = dis_SSE_E_to_G_all( sorb, delta+2, "minpd", Iop_Min64Fx2 );
goto decode_success;
}
-//.. /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
-//.. /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
-//.. /* These just appear to be special cases of SHUFPS */
-//.. if (sz == 2 && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
-//.. IRTemp s1 = newTemp(Ity_I64);
-//.. IRTemp s0 = newTemp(Ity_I64);
-//.. IRTemp d1 = newTemp(Ity_I64);
-//.. IRTemp d0 = newTemp(Ity_I64);
-//.. IRTemp sV = newTemp(Ity_V128);
-//.. IRTemp dV = newTemp(Ity_V128);
-//.. Bool hi = insn[1] == 0x15;
-//..
-//.. modrm = insn[2];
-//.. assign( dV, getXMMReg(gregOfRM(modrm)) );
-//..
-//.. if (epartIsReg(modrm)) {
-//.. assign( sV, getXMMReg(eregOfRM(modrm)) );
-//.. delta += 2+1;
-//.. DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
-//.. nameXMMReg(eregOfRM(modrm)),
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. } else {
-//.. addr = disAMode ( &alen, sorb, delta+2, dis_buf );
-//.. assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
-//.. delta += 2+alen;
-//.. DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
-//.. dis_buf,
-//.. nameXMMReg(gregOfRM(modrm)));
-//.. }
-//..
-//.. assign( d1, unop(Iop_128HIto64, mkexpr(dV)) );
-//.. assign( d0, unop(Iop_128to64, mkexpr(dV)) );
-//.. assign( s1, unop(Iop_128HIto64, mkexpr(sV)) );
-//.. assign( s0, unop(Iop_128to64, mkexpr(sV)) );
-//..
-//.. if (hi) {
-//.. putXMMReg( gregOfRM(modrm),
-//.. binop(Iop_64HLto128, mkexpr(s1), mkexpr(d1)) );
-//.. } else {
-//.. putXMMReg( gregOfRM(modrm),
-//.. binop(Iop_64HLto128, mkexpr(s0), mkexpr(d0)) );
-//.. }
-//..
-//.. goto decode_success;
-//.. }
+ /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
+ /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
+ /* These just appear to be special cases of SHUFPS */
+ if (have66noF2noF3(pfx)
+ && sz == 2 /* could be 8 if rex also present */
+ && insn[0] == 0x0F && (insn[1] == 0x15 || insn[1] == 0x14)) {
+ IRTemp s1 = newTemp(Ity_I64);
+ IRTemp s0 = newTemp(Ity_I64);
+ IRTemp d1 = newTemp(Ity_I64);
+ IRTemp d0 = newTemp(Ity_I64);
+ IRTemp sV = newTemp(Ity_V128);
+ IRTemp dV = newTemp(Ity_V128);
+ Bool hi = insn[1] == 0x15;
+
+ modrm = insn[2];
+ assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+ if (epartIsReg(modrm)) {
+ assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+ delta += 2+1;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ nameXMMReg(eregOfRexRM(pfx,modrm)),
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ } else {
+ addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 );
+ assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+ delta += 2+alen;
+ DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
+ dis_buf,
+ nameXMMReg(gregOfRexRM(pfx,modrm)));
+ }
+
+ assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
+ assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
+ assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
+ assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
+
+ if (hi) {
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
+ } else {
+ putXMMReg( gregOfRexRM(pfx,modrm),
+ binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
+ }
+
+ goto decode_success;
+ }
/* 66 0F 57 = XORPD -- G = G xor E */
if (have66noF2noF3(pfx) && insn[0] == 0x0F && insn[1] == 0x57) {
case Asse_SUBF: return "sub";
case Asse_MULF: return "mul";
case Asse_DIVF: return "div";
-//.. case Xsse_MAXF: return "max";
-//.. case Xsse_MINF: return "min";
+ case Asse_MAXF: return "max";
+ case Asse_MINF: return "min";
//.. case Xsse_CMPEQF: return "cmpFeq";
//.. case Xsse_CMPLTF: return "cmpFlt";
//.. case Xsse_CMPLEF: return "cmpFle";
//.. vassert(cond != Xcc_ALWAYS);
//.. return i;
//.. }
-//.. AMD64Instr* AMD64Instr_FpLdStCW ( Bool isLoad, AMD64AMode* addr ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpLdStCW;
-//.. i->Xin.FpLdStCW.isLoad = isLoad;
-//.. i->Xin.FpLdStCW.addr = addr;
-//.. return i;
-//.. }
+AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_LdMXCSR;
+ i->Ain.LdMXCSR.addr = addr;
+ return i;
+}
//.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
//.. i->tag = Xin_FpStSW_AX;
vassert(sz == 4 || sz == 8);
return i;
}
-
+AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseSI2SF;
+ i->Ain.SseSI2SF.szS = szS;
+ i->Ain.SseSI2SF.szD = szD;
+ i->Ain.SseSI2SF.src = src;
+ i->Ain.SseSI2SF.dst = dst;
+ vassert(szS == 4 || szS == 8);
+ vassert(szD == 4 || szD == 8);
+ return i;
+}
+AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_SseSF2SI;
+ i->Ain.SseSF2SI.szS = szS;
+ i->Ain.SseSF2SI.szD = szD;
+ i->Ain.SseSF2SI.src = src;
+ i->Ain.SseSF2SI.dst = dst;
+ vassert(szS == 4 || szS == 8);
+ vassert(szD == 4 || szD == 8);
+ return i;
+}
//.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
//.. i->tag = Xin_SseConst;
//.. case Xin_FpStSW_AX:
//.. vex_printf("fstsw %%ax");
//.. return;
+ case Ain_LdMXCSR:
+ vex_printf("ldmxcsr ");
+ ppAMD64AMode(i->Ain.LdMXCSR.addr);
+ break;
case Ain_SseUComIS:
vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
ppHRegAMD64(i->Ain.SseUComIS.srcL);
vex_printf(" ; pushfq ; popq ");
ppHRegAMD64(i->Ain.SseUComIS.dst);
break;
+ case Ain_SseSI2SF:
+ vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
+ (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.SseSI2SF.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.SseSI2SF.dst);
+ break;
+ case Ain_SseSF2SI:
+ vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
+ ppHRegAMD64(i->Ain.SseSF2SI.src);
+ vex_printf(",");
+ (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+ (i->Ain.SseSF2SI.dst);
+ break;
//.. case Xin_SseConst:
//.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
//.. ppHRegAMD64(i->Xin.SseConst.dst);
//.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
//.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
//.. return;
-//.. case Xin_FpLdStCW:
-//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStCW.addr);
-//.. return;
+ case Ain_LdMXCSR:
+ addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
+ return;
//.. case Xin_FpStSW_AX:
//.. addHRegUse(u, HRmWrite, hregAMD64_EAX());
//.. return;
addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
return;
+ case Ain_SseSI2SF:
+ addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
+ return;
+ case Ain_SseSF2SI:
+ addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
+ addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
+ return;
case Ain_SseLdSt:
addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
//.. mapReg(m, &i->Xin.FpCMov.src);
//.. mapReg(m, &i->Xin.FpCMov.dst);
//.. return;
-//.. case Xin_FpLdStCW:
-//.. mapRegs_AMD64AMode(m, i->Xin.FpLdStCW.addr);
-//.. return;
+ case Ain_LdMXCSR:
+ mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
+ return;
//.. case Xin_FpStSW_AX:
//.. return;
case Ain_SseUComIS:
mapReg(m, &i->Ain.SseUComIS.srcR);
mapReg(m, &i->Ain.SseUComIS.dst);
return;
+ case Ain_SseSI2SF:
+ mapReg(m, &i->Ain.SseSI2SF.src);
+ mapReg(m, &i->Ain.SseSI2SF.dst);
+ return;
+ case Ain_SseSF2SI:
+ mapReg(m, &i->Ain.SseSF2SI.src);
+ mapReg(m, &i->Ain.SseSF2SI.dst);
+ return;
//.. case Xin_SseConst:
//.. mapReg(m, &i->Xin.SseConst.dst);
//.. return;
*p++ = 0xFF;
p = doAMode_M(p, fake(6), i->Ain.Push.src->Armi.Mem.am);
goto done;
-//.. case Xrmi_Imm:
-//.. *p++ = 0x68;
-//.. p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
-//.. goto done;
-//.. case Xrmi_Reg:
-//.. *p++ = 0x50 + iregNo(i->Xin.Push.src->Xrmi.Reg.reg);
-//.. goto done;
+ case Armi_Imm:
+ *p++ = 0x68;
+ p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
+ goto done;
+ case Armi_Reg:
+ *p++ = 0x40 + (1 & iregBit3(i->Ain.Push.src->Armi.Reg.reg));
+ *p++ = 0x50 + iregNo(i->Ain.Push.src->Armi.Reg.reg);
+ goto done;
default:
goto bad;
}
//.. /* Fill in the jump offset. */
//.. *(ptmp-1) = p - ptmp;
//.. goto done;
-//..
-//.. case Xin_FpLdStCW:
-//.. if (i->Xin.FpLdStCW.isLoad) {
-//.. *p++ = 0xD9;
-//.. p = doAMode_M(p, fake(5)/*subopcode*/, i->Xin.FpLdStCW.addr);
-//.. } else {
-//.. vassert(0);
-//.. }
-//.. goto done;
-//..
+
+ case Ain_LdMXCSR:
+ *p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
+ *p++ = 0x0F;
+ *p++ = 0xAE;
+ p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
+ goto done;
+
//.. case Xin_FpStSW_AX:
//.. /* note, this emits fnstsw %ax, not fstsw %ax */
//.. *p++ = 0xDF;
*p++ = 0x58 + iregNo(i->Ain.SseUComIS.dst);
goto done;
+ case Ain_SseSI2SF:
+ /* cvssi2s[sd] %src, %dst */
+ rex = rexAMode_R( vreg2ireg(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
+ *p++ = i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2;
+ *p++ = i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex;
+ *p++ = 0x0F;
+ *p++ = 0x2A;
+ p = doAMode_R( p, vreg2ireg(i->Ain.SseSI2SF.dst),
+ i->Ain.SseSI2SF.src );
+ goto done;
+
+ case Ain_SseSF2SI:
+ /* cvss[sd]2si %src, %dst */
+ rex = rexAMode_R( i->Ain.SseSF2SI.dst,
+ vreg2ireg(i->Ain.SseSF2SI.src) );
+ *p++ = i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2;
+ *p++ = i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex;
+ *p++ = 0x0F;
+ *p++ = 0x2D;
+ p = doAMode_R( p, i->Ain.SseSF2SI.dst,
+ vreg2ireg(i->Ain.SseSF2SI.src) );
+ goto done;
+
//..
//.. case Xin_FpCmp:
//.. /* gcmp %fL, %fR, %dst
switch (i->Ain.Sse64FLo.op) {
case Asse_ADDF: *p++ = 0x58; break;
case Asse_DIVF: *p++ = 0x5E; break;
-//.. case Xsse_MAXF: *p++ = 0x5F; break;
+ case Asse_MAXF: *p++ = 0x5F; break;
//.. case Xsse_MINF: *p++ = 0x5D; break;
case Asse_MULF: *p++ = 0x59; break;
//.. case Xsse_RCPF: *p++ = 0x53; break;
case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
//.. case Xsse_OR: XX(rex); XX(0x0F); XX(0x56); break;
case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
-//.. case Xsse_AND: XX(rex); XX(0x0F); XX(0x54); break;
+ case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
//.. case Xsse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
//.. case Xsse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
//.. case Xsse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
Asse_MOV,
/* Floating point binary */
Asse_ADDF, Asse_SUBF, Asse_MULF, Asse_DIVF,
-//.. Xsse_MAXF, Xsse_MINF,
+ Asse_MAXF, Asse_MINF,
//.. Xsse_CMPEQF, Xsse_CMPLTF, Xsse_CMPLEF, Xsse_CMPUNF,
/* Floating point unary */
Asse_RCPF, Asse_RSQRTF, Asse_SQRTF,
//.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int */
//.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */
//.. Xin_FpCMov, /* FP fake floating point conditional move */
-//.. Xin_FpLdStCW, /* fldcw / fstcw */
+ Ain_LdMXCSR, /* load %mxcsr */
//.. Xin_FpStSW_AX, /* fstsw %ax */
Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
register */
+ Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
+ Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
//..
//.. Xin_SseConst, /* Generate restricted SSE literal */
Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
//.. HReg src;
//.. HReg dst;
//.. } FpCMov;
-//.. /* Load/store the FPU's 16-bit control word (fldcw/fstcw) */
-//.. struct {
-//.. Bool isLoad;
-//.. X86AMode* addr;
-//.. }
-//.. FpLdStCW;
+ /* Load 32 bits into %mxcsr. */
+ struct {
+ AMD64AMode* addr;
+ }
+ LdMXCSR;
//.. /* fstsw %ax */
//.. struct {
//.. /* no fields */
HReg srcR; /* xmm */
HReg dst; /* int */
} SseUComIS;
+ /* scalar 32/64 int to 32/64 float conversion */
+ struct {
+ UChar szS; /* 4 or 8 */
+ UChar szD; /* 4 or 8 */
+ HReg src; /* i class */
+ HReg dst; /* v class */
+ } SseSI2SF;
+ /* scalar 32/64 float to 32/64 int conversion */
+ struct {
+ UChar szS; /* 4 or 8 */
+ UChar szD; /* 4 or 8 */
+ HReg src; /* v class */
+ HReg dst; /* i class */
+ } SseSF2SI;
//..
//.. /* Simplistic SSE[123] */
//.. struct {
//.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
//.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst );
//.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, HReg dst );
-//.. extern AMD64Instr* AMD64Instr_FpLdStCW ( Bool isLoad, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
//.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void );
extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst );
//..
//.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst );
extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* );
#include "host-generic/h_generic_regs.h"
//.. #include "host-generic/h_generic_simd64.h"
#include "host-amd64/hdefs.h"
-//..
-//..
-//.. /*---------------------------------------------------------*/
-//.. /*--- x87 control word stuff ---*/
-//.. /*---------------------------------------------------------*/
-//..
-//.. /* Vex-generated code expects to run with the FPU set as follows: all
-//.. exceptions masked, round-to-nearest, precision = 53 bits. This
-//.. corresponds to a FPU control word value of 0x027F.
-//..
-//.. Similarly the SSE control word (%mxcsr) should be 0x1F80.
-//..
-//.. %fpucw and %mxcsr should have these values on entry to
-//.. Vex-generated code, and should those values should be
-//.. unchanged at exit.
-//.. */
-//..
-//.. #define DEFAULT_FPUCW 0x027F
-//..
-//.. /* debugging only, do not use */
-//.. /* define DEFAULT_FPUCW 0x037F */
+
+
+/*---------------------------------------------------------*/
+/*--- x87/SSE control word stuff ---*/
+/*---------------------------------------------------------*/
+
+/* Vex-generated code expects to run with the FPU set as follows: all
+ exceptions masked, round-to-nearest, precision = 53 bits. This
+ corresponds to a FPU control word value of 0x027F.
+
+ Similarly the SSE control word (%mxcsr) should be 0x1F80.
+
+ %fpucw and %mxcsr should have these values on entry to
+ Vex-generated code, and should those values should be
+ unchanged at exit.
+*/
+
+#define DEFAULT_FPUCW 0x027F
+
+#define DEFAULT_MXCSR 0x1F80
+
+/* debugging only, do not use */
+/* define DEFAULT_FPUCW 0x037F */
/*---------------------------------------------------------*/
}
-
/*---------------------------------------------------------*/
/*--- ISelEnv ---*/
/*---------------------------------------------------------*/
//.. }
//..
//..
+
+/* Set the SSE unit's rounding mode to default (%mxcsr = 0x1F80) */
+static
+void set_SSE_rounding_default ( ISelEnv* env )
+{
+ /* pushq $DEFAULT_MXCSR
+ ldmxcsr 0(%rsp)
+ addq $8, %rsp
+ */
+ AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Imm(DEFAULT_MXCSR)));
+ addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
+ add_to_rsp(env, 8);
+}
+
//.. /* Mess with the FPU's rounding mode: set to the default rounding mode
//.. (DEFAULT_FPUCW). */
//.. static
//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
//.. add_to_esp(env, 4);
//.. }
-//..
-//..
+
+
+/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
+ expression denoting a value in the range 0 .. 3, indicating a round
+ mode encoded as per type IRRoundingMode. Set the SSE machinery to
+ have the same rounding.
+*/
+static
+void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ /* Note: this sequence only makes sense because DEFAULT_MXCSR has
+ both rounding bits == 0. If that wasn't the case, we couldn't
+ create a new rounding field simply by ORing the new value into
+ place. */
+
+ /* movq $3, %reg
+ andq [[mode]], %reg -- shouldn't be needed; paranoia
+ shlq $13, %reg
+ orq $DEFAULT_MXCSR, %reg
+ pushq %reg
+ ldmxcsr 0(%esp)
+ addq $8, %rsp
+ */
+ HReg reg = newVRegI(env);
+ AMD64AMode* zero_rsp = AMD64AMode_IR(0, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Imm(3), reg));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
+ iselIntExpr_RMI(env, mode), reg));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 13, AMD64RM_Reg(reg)));
+ addInstr(env, AMD64Instr_Alu64R(
+ Aalu_OR, AMD64RMI_Imm(DEFAULT_MXCSR), reg));
+ addInstr(env, AMD64Instr_Push(AMD64RMI_Reg(reg)));
+ addInstr(env, AMD64Instr_LdMXCSR(zero_rsp));
+ add_to_rsp(env, 8);
+}
+
+
//.. /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
//.. expression denoting a value in the range 0 .. 3, indicating a round
//.. mode encoded as per type IRRoundingMode. Set the x87 FPU to have
return dst;
}
+ if (e->Iex.Binop.op == Iop_F64toI32) {
+ HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegI(env);
+ set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, AMD64Instr_SseSF2SI( 8, 4, rf, dst ));
+ set_SSE_rounding_default(env);
+ return dst;
+ }
+
//.. if (e->Iex.Binop.op == Iop_F64toI32 || e->Iex.Binop.op == Iop_F64toI16) {
//.. Int sz = e->Iex.Binop.op == Iop_F64toI16 ? 2 : 4;
//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
//.. return dst;
//.. }
+
+ /* V128{HI}to64 */
+ case Iop_V128HIto64:
+ case Iop_V128to64: {
+ Int off = e->Iex.Unop.op==Iop_V128HIto64 ? 8 : 0;
+ HReg dst = newVRegI(env);
+ HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
+ AMD64AMode* rsp0 = AMD64AMode_IR(0, hregAMD64_RSP());
+ AMD64AMode* rspN = AMD64AMode_IR(off, hregAMD64_RSP());
+ sub_from_rsp(env, 16);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
+ addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
+ AMD64RMI_Mem(rspN), dst ));
+ add_to_rsp(env, 16);
+ return dst;
+ }
+
case Iop_16to8:
case Iop_32to8:
case Iop_32to16:
//.. *rLo = tLo;
//.. return;
//.. }
-//..
-//.. /* 128{HI}to64 */
-//.. case Iop_128HIto64:
-//.. case Iop_128to64: {
-//.. Int off = e->Iex.Unop.op==Iop_128HIto64 ? 8 : 0;
-//.. HReg tLo = newVRegI(env);
-//.. HReg tHi = newVRegI(env);
-//.. HReg vec = iselVecExpr(env, e->Iex.Unop.arg);
-//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
-//.. X86AMode* espLO = X86AMode_IR(off, hregX86_ESP());
-//.. X86AMode* espHI = X86AMode_IR(off+4, hregX86_ESP());
-//.. sub_from_esp(env, 16);
-//.. addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
-//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV,
-//.. X86RMI_Mem(espLO), tLo ));
-//.. addInstr(env, X86Instr_Alu32R( Xalu_MOV,
-//.. X86RMI_Mem(espHI), tHi ));
-//.. add_to_esp(env, 16);
-//.. *rHi = tHi;
-//.. *rLo = tLo;
-//.. return;
-//.. }
-//..
+
//.. /* could do better than this, but for now ... */
//.. case Iop_1Sto64: {
//.. HReg tLo = newVRegI(env);
//..
//.. return dst;
//.. }
-//..
-//.. if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64toF64) {
-//.. HReg dst = newVRegF(env);
-//.. HReg rHi,rLo;
-//.. iselInt64Expr( &rHi, &rLo, env, e->Iex.Binop.arg2);
-//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rHi)));
-//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rLo)));
-//..
-//.. /* Set host rounding mode */
-//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
-//..
-//.. addInstr(env, X86Instr_FpLdStI(
-//.. True/*load*/, 8, dst,
-//.. X86AMode_IR(0, hregX86_ESP())));
-//..
-//.. /* Restore default FPU rounding. */
-//.. set_FPU_rounding_default( env );
-//..
-//.. add_to_esp(env, 8);
-//.. return dst;
-//.. }
-//..
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64toF64) {
+ HReg dst = newVRegV(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Binop.arg2);
+ set_SSE_rounding_mode( env, e->Iex.Binop.arg1 );
+ addInstr(env, AMD64Instr_SseSI2SF( 8, 8, src, dst ));
+ set_SSE_rounding_default( env );
+ return dst;
+ }
+
+ if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_I32toF64) {
+ HReg dst = newVRegV(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ set_SSE_rounding_default( env );
+ addInstr(env, AMD64Instr_SseSI2SF( 4, 8, src, dst ));
+ return dst;
+ }
+
//.. if (e->tag == Iex_Unop) {
//.. X86FpOp fpop = Xfp_INVALID;
//.. switch (e->Iex.Unop.op) {
return dst;
}
-//.. if (e->tag == Iex_LDle) {
-//.. HReg dst = newVRegV(env);
-//.. X86AMode* am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
-//.. addInstr(env, X86Instr_SseLdSt( True/*load*/, dst, am ));
-//.. return dst;
-//.. }
-//..
-//.. if (e->tag == Iex_Const) {
-//.. HReg dst = newVRegV(env);
-//.. vassert(e->Iex.Const.con->tag == Ico_V128);
-//.. addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
-//.. return dst;
-//.. }
+ if (e->tag == Iex_LDle) {
+ HReg dst = newVRegV(env);
+ AMD64AMode* am = iselIntExpr_AMode(env, e->Iex.LDle.addr);
+ addInstr(env, AMD64Instr_SseLdSt( True/*load*/, 16, dst, am ));
+ return dst;
+ }
+
+ if (e->tag == Iex_Const) {
+ HReg dst = newVRegV(env);
+ vassert(e->Iex.Const.con->tag == Ico_V128);
+ if (e->Iex.Const.con->Ico.V128 == 0) {
+ addInstr(env, AMD64Instr_SseReRg(Asse_XOR, dst, dst));
+ return dst;
+ } else {
+ goto vec_fail;
+#if 0
+ addInstr(env, X86Instr_SseConst(e->Iex.Const.con->Ico.V128, dst));
+ return dst;
+#endif
+ }
+ }
if (e->tag == Iex_Unop) {
switch (e->Iex.Unop.op) {
return dst;
}
-//.. case Iop_64HLto128: {
-//.. HReg r3, r2, r1, r0;
-//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
-//.. X86AMode* esp4 = advance4(esp0);
-//.. X86AMode* esp8 = advance4(esp4);
-//.. X86AMode* esp12 = advance4(esp8);
-//.. HReg dst = newVRegV(env);
-//.. /* do this via the stack (easy, convenient, etc) */
-//.. sub_from_esp(env, 16);
-//.. /* Do the less significant 64 bits */
-//.. iselInt64Expr(&r1, &r0, env, e->Iex.Binop.arg2);
-//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r0), esp0));
-//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r1), esp4));
-//.. /* Do the more significant 64 bits */
-//.. iselInt64Expr(&r3, &r2, env, e->Iex.Binop.arg1);
-//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r2), esp8));
-//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r3), esp12));
-//.. /* Fetch result back from stack. */
-//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
-//.. add_to_esp(env, 16);
-//.. return dst;
-//.. }
-//..
+ case Iop_64HLtoV128: {
+ AMD64AMode* rsp = AMD64AMode_IR(0, hregAMD64_RSP());
+ HReg dst = newVRegV(env);
+ /* do this via the stack (easy, convenient, etc) */
+ addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg1)));
+ addInstr(env, AMD64Instr_Push(iselIntExpr_RMI(env, e->Iex.Binop.arg2)));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 16, dst, rsp));
+ add_to_rsp(env, 16);
+ return dst;
+ }
+
//.. case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
//.. case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
//.. case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
//.. case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2;
case Iop_Add64F0x2: op = Asse_ADDF; goto do_64F0x2;
case Iop_Div64F0x2: op = Asse_DIVF; goto do_64F0x2;
-//.. case Iop_Max64F0x2: op = Xsse_MAXF; goto do_64F0x2;
+ case Iop_Max64F0x2: op = Asse_MAXF; goto do_64F0x2;
//.. case Iop_Min64F0x2: op = Xsse_MINF; goto do_64F0x2;
case Iop_Mul64F0x2: op = Asse_MULF; goto do_64F0x2;
case Iop_Sub64F0x2: op = Asse_SUBF; goto do_64F0x2;
//.. case Iop_InterleaveLO64x2:
//.. op = Xsse_UNPCKLQ; arg1isEReg = True; goto do_SseReRg;
//..
-//.. case Iop_And128: op = Xsse_AND; goto do_SseReRg;
+ case Iop_AndV128: op = Asse_AND; goto do_SseReRg;
//.. case Iop_Or128: op = Xsse_OR; goto do_SseReRg;
case Iop_XorV128: op = Asse_XOR; goto do_SseReRg;
//.. case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg;
//.. addInstr(env, X86Instr_FpLdSt( False/*store*/, 4, f32, am ));
//.. return;
//.. }
-//.. if (ty == Ity_F64) {
-//.. HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
-//.. X86AMode* am = X86AMode_IR(stmt->Ist.Put.offset, hregX86_EBP());
-//.. set_FPU_rounding_default(env); /* paranoia */
-//.. addInstr(env, X86Instr_FpLdSt( False/*store*/, 8, f64, am ));
-//.. return;
-//.. }
+ if (ty == Ity_F64) {
+ HReg f64 = iselDblExpr(env, stmt->Ist.Put.data);
+ AMD64AMode* am = AMD64AMode_IR( stmt->Ist.Put.offset,
+ hregAMD64_RBP() );
+ addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, f64, am ));
+ return;
+ }
break;
}