/* Set the CR6 flags following an AltiVec compare operation. */
-static void set_AV_CR6 ( IRExpr* result )
+static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
{
/* CR6[0:3] = {all_ones, 0, all_zeros, 0}
all_ones = (v[0] && v[1] && v[2] && v[3])
assign( v2, binop(Iop_ShrV128, result, mkU8(64)) );
assign( v3, binop(Iop_ShrV128, result, mkU8(96)) );
- assign( rOnes, unop(Iop_1Uto8,
- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
- unop(Iop_V128to32,
- binop(Iop_AndV128,
- binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
- binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))))) );
-
assign( rZeros, unop(Iop_1Uto8,
binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
unop(Iop_Not32,
binop(Iop_OrV128, mkexpr(v2), mkexpr(v3))))
))) );
- putCR321( 6, binop(Iop_Or8,
- binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)),
- binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
+ if (test_all_ones) {
+ assign( rOnes, unop(Iop_1Uto8,
+ binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF),
+ unop(Iop_V128to32,
+ binop(Iop_AndV128,
+ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)),
+ binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))))) );
+ putCR321( 6, binop(Iop_Or8,
+ binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)),
+ binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) );
+ } else {
+ putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) );
+ }
putCR0( 6, mkU8(0) );
}
putVReg( vD_addr, mkexpr(vD) );
if (flag_Rc) {
- set_AV_CR6( mkexpr(vD) );
+ set_AV_CR6( mkexpr(vD), True );
}
return True;
}
UChar vC_addr = toUChar((theInstr >> 6) & 0x1F); /* theInstr[6:10] */
UInt opc2=0;
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vC = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+ assign( vC, getVReg(vC_addr));
+
if (opc1 != 0x4) {
vex_printf("dis_av_fp_arith(PPC32)(instr)\n");
return False;
switch (opc2) {
case 0x00A: // vaddfp (Add FP, AV p137)
DIP("vaddfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Add32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
case 0x04A: // vsubfp (Subtract FP, AV p261)
DIP("vsubfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Sub32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
case 0x40A: // vmaxfp (Maximum FP, AV p178)
DIP("vmaxfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Max32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
case 0x44A: // vminfp (Minimum FP, AV p187)
DIP("vminfp v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ putVReg( vD_addr, binop(Iop_Min32Fx4, mkexpr(vA), mkexpr(vB)) );
+ return True;
default:
break; // Fall through...
switch (opc2) {
case 0x10A: // vrefp (Reciprocal Esimate FP, AV p228)
DIP("vrefp v%d,v%d\n", vD_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ putVReg( vD_addr, unop(Iop_Recip32Fx4, mkexpr(vB)) );
+ return True;
case 0x14A: // vrsqrtefp (Reciprocal Square Root Estimate FP, AV p237)
DIP("vrsqrtefp v%d,v%d\n", vD_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ putVReg( vD_addr, unop(Iop_RSqrt32Fx4, mkexpr(vB)) );
+ return True;
case 0x18A: // vexptefp (2 Raised to the Exp Est FP, AV p173)
DIP("vexptefp v%d,v%d\n", vD_addr, vB_addr);
UChar flag_Rc = toUChar((theInstr >> 10) & 0x1); /* theInstr[10] */
UInt opc2 = (theInstr >> 0) & 0x3FF; /* theInstr[0:9] */
+ Bool cmp_bounds = False;
+
+ IRTemp vA = newTemp(Ity_V128);
+ IRTemp vB = newTemp(Ity_V128);
+ IRTemp vD = newTemp(Ity_V128);
+ assign( vA, getVReg(vA_addr));
+ assign( vB, getVReg(vB_addr));
+
if (opc1 != 0x4) {
vex_printf("dis_av_fp_cmp(PPC32)(instr)\n");
return False;
switch (opc2) {
case 0x0C6: // vcmpeqfp (Compare Equal-to FP, AV p159)
DIP("vcmpeqfp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ assign( vD, binop(Iop_CmpEQ32Fx4, mkexpr(vA), mkexpr(vB)) );
+ break;
case 0x1C6: // vcmpgefp (Compare Greater-than-or-Equal-to FP, AV p163)
DIP("vcmpgefp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ assign( vD, binop(Iop_CmpGE32Fx4, mkexpr(vA), mkexpr(vB)) );
+ break;
case 0x2C6: // vcmpgtfp (Compare Greater-than FP, AV p164)
DIP("vcmpgtfp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ assign( vD, binop(Iop_CmpGT32Fx4, mkexpr(vA), mkexpr(vB)) );
+ break;
- case 0x3C6: // vcmpbfp (Compare Bounds FP, AV p157)
+ case 0x3C6: { // vcmpbfp (Compare Bounds FP, AV p157)
+ IRTemp gt = newTemp(Ity_V128);
+ IRTemp lt = newTemp(Ity_V128);
+ IRTemp zeros = newTemp(Ity_V128);
DIP("vcmpbfp%s v%d,v%d,v%d\n", (flag_Rc ? ".":""), vD_addr, vA_addr, vB_addr);
- DIP(" => not implemented\n");
- return False;
+ cmp_bounds = True;
+ assign( zeros, unop(Iop_Dup32x4, mkU32(0)) );
+
+ /* Note: making use of fact that the ppc backend for compare insns
+ return zero'd lanes if either of the corresponding arg lanes is a nan.
+
+ Perhaps better to have an irop Iop_isNan32Fx4, but then we'd
+ need this for the other compares too (vcmpeqfp etc)...
+ Better still, tighten down the spec for compare irops.
+ */
+ assign( gt, unop(Iop_NotV128,
+ binop(Iop_CmpLE32Fx4, mkexpr(vA), mkexpr(vB))) );
+ assign( lt, unop(Iop_NotV128,
+ binop(Iop_CmpGE32Fx4, mkexpr(vA),
+ binop(Iop_Sub32Fx4, mkexpr(zeros), mkexpr(vB)))) );
+
+ // finally, just shift gt,lt to correct position
+ assign( vD, binop(Iop_ShlN32x4,
+ binop(Iop_OrV128,
+ binop(Iop_AndV128, mkexpr(gt),
+ unop(Iop_Dup32x4, mkU32(0x2))),
+ binop(Iop_AndV128, mkexpr(lt),
+ unop(Iop_Dup32x4, mkU32(0x1)))),
+ mkU8(30)) );
+ break;
+ }
default:
vex_printf("dis_av_fp_cmp(PPC32)(opc2)\n");
return False;
}
+
+ putVReg( vD_addr, mkexpr(vD) );
+
+ if (flag_Rc) {
+ set_AV_CR6( mkexpr(vD), !cmp_bounds );
+ }
return True;
}
case Pav_MRGHI: return "vmrgh"; // b,h,w
case Pav_MRGLO: return "vmrgl"; // b,h,w
+ default: vpanic("showPPC32AvOp");
+ }
+}
+
+HChar* showPPC32AvFpOp ( PPC32AvOp op ) {
+ switch (op) {
/* Floating Point Binary */
- case Pav_ADDF: return "vaddfp";
- case Pav_SUBF: return "vsubfp";
- case Pav_MULF: return "vmaddfp";
- case Pav_MAXF: return "vmaxfp";
- case Pav_MINF: return "vminfp";
- case Pav_CMPEQF: return "vcmpeqfp";
- case Pav_CMPGTF: return "vcmpgtfp";
- case Pav_CMPGEF: return "vcmpgefp";
+ case Pavfp_ADDF: return "vaddfp";
+ case Pavfp_SUBF: return "vsubfp";
+ case Pavfp_MULF: return "vmaddfp";
+ case Pavfp_MAXF: return "vmaxfp";
+ case Pavfp_MINF: return "vminfp";
+ case Pavfp_CMPEQF: return "vcmpeqfp";
+ case Pavfp_CMPGTF: return "vcmpgtfp";
+ case Pavfp_CMPGEF: return "vcmpgefp";
+ /* Floating Point Unary */
+ case Pavfp_RCPF: return "vrefp";
+ case Pavfp_RSQRTF: return "vrsqrtefp";
+
default: vpanic("showPPC32AvOp");
}
}
return i;
}
PPC32Instr* PPC32Instr_AvUnary ( PPC32AvOp op, HReg dst, HReg src ) {
- PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr));
- i->tag = Pin_AvUnary;
+ PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr));
+ i->tag = Pin_AvUnary;
i->Pin.AvUnary.op = op;
i->Pin.AvUnary.dst = dst;
i->Pin.AvUnary.src = src;
i->Pin.AvBin32Fx4.srcR = srcR;
return i;
}
+PPC32Instr* PPC32Instr_AvUn32Fx4 ( PPC32AvOp op, HReg dst, HReg src ) {
+ PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr));
+ i->tag = Pin_AvUn32Fx4;
+ i->Pin.AvUn32Fx4.op = op;
+ i->Pin.AvUn32Fx4.dst = dst;
+ i->Pin.AvUn32Fx4.src = src;
+ return i;
+}
PPC32Instr* PPC32Instr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl ) {
PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr));
i->tag = Pin_AvPerm;
vex_printf(",");
ppHRegPPC32(i->Pin.AvBin32Fx4.srcR);
return;
+ case Pin_AvUn32Fx4:
+ vex_printf("%s ", showPPC32AvOp(i->Pin.AvUn32Fx4.op));
+ ppHRegPPC32(i->Pin.AvUn32Fx4.dst);
+ vex_printf(",");
+ ppHRegPPC32(i->Pin.AvUn32Fx4.src);
+ return;
case Pin_AvPerm:
vex_printf("vperm ");
ppHRegPPC32(i->Pin.AvPerm.dst);
addHRegUse(u, HRmWrite, i->Pin.AvBin32x4.dst);
addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcL);
addHRegUse(u, HRmRead, i->Pin.AvBin32x4.srcR);
- if (i->Pin.AvBin32x4.op == Pav_MULF)
- addHRegUse(u, HRmWrite, hregPPC32_GPR29());
return;
case Pin_AvBin32Fx4:
addHRegUse(u, HRmWrite, i->Pin.AvBin32Fx4.dst);
addHRegUse(u, HRmRead, i->Pin.AvBin32Fx4.srcL);
addHRegUse(u, HRmRead, i->Pin.AvBin32Fx4.srcR);
+ if (i->Pin.AvBin32Fx4.op == Pavfp_MULF)
+ addHRegUse(u, HRmWrite, hregPPC32_GPR29());
+ return;
+ case Pin_AvUn32Fx4:
+ addHRegUse(u, HRmWrite, i->Pin.AvUn32Fx4.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvUn32Fx4.src);
return;
case Pin_AvPerm:
addHRegUse(u, HRmWrite, i->Pin.AvPerm.dst);
mapReg(m, &i->Pin.AvBin32Fx4.srcL);
mapReg(m, &i->Pin.AvBin32Fx4.srcR);
return;
+ case Pin_AvUn32Fx4:
+ mapReg(m, &i->Pin.AvUn32Fx4.dst);
+ mapReg(m, &i->Pin.AvUn32Fx4.src);
+ return;
case Pin_AvPerm:
mapReg(m, &i->Pin.AvPerm.dst);
mapReg(m, &i->Pin.AvPerm.srcL);
return emit32(p, theInstr);
}
-static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2, UInt Rc,
- UInt r3, UInt opc2 )
+static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2,
+ UInt r3, UInt Rc, UInt opc2 )
{
UInt theInstr;
vassert(opc1 < 0x40);
p = mkFormVX( p, 4, v_dst, v_src, v_src, opc2 );
break;
default:
- p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
- break;
+ p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+ break;
}
goto done;
}
UInt v_srcR = vregNo(i->Pin.AvBin32Fx4.srcR);
switch (i->Pin.AvBin32Fx4.op) {
- case Pav_ADDF:
+ case Pavfp_ADDF:
p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 10 ); // vaddfp
break;
- case Pav_SUBF:
+ case Pavfp_SUBF:
p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 74 ); // vsubfp
break;
- case Pav_MAXF:
+ case Pavfp_MAXF:
p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1034 ); // vmaxfp
break;
- case Pav_MINF:
+ case Pavfp_MINF:
p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1098 ); // vminfp
break;
- case Pav_MULF: {
+ case Pavfp_MULF: {
/* Make a vmulfp from a vmaddfp:
load -0.0 (0x8000_0000) to each 32-bit word of vB
this makes the add a noop.
*/
UInt vB = 29; // XXX: Using r29 for temp
- UInt zero_simm = 0x80000000;
+ UInt konst = 0x1F;
// Better way to load zero_imm?
// vspltisw vB,0x1F (0x1F => each word of vB)
- p = mkFormVX( p, 4, vB, zero_simm, 0, 908 );
+ p = mkFormVX( p, 4, vB, konst, 0, 908 );
// vslw vB,vB,vB (each word of vB = (0x1F << 0x1F) = 0x80000000
p = mkFormVX( p, 4, vB, vB, vB, 388 );
p = mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46 );
break;
}
- case Pav_CMPEQF:
+ case Pavfp_CMPEQF:
p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198 ); // vcmpeqfp
break;
- case Pav_CMPGTF:
- p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 710 ); // vcmpgtfp
+ case Pavfp_CMPGTF:
+ p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 710 ); // vcmpgtfp
break;
- case Pav_CMPGEF:
- p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 454 ); // vcmpgefp
+ case Pavfp_CMPGEF:
+ p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 454 ); // vcmpgefp
break;
default:
goto done;
}
+ case Pin_AvUn32Fx4: {
+ UInt v_dst = vregNo(i->Pin.AvUn32Fx4.dst);
+ UInt v_src = vregNo(i->Pin.AvUn32Fx4.src);
+ UInt opc2;
+ switch (i->Pin.AvUn32Fx4.op) {
+ case Pavfp_RCPF: opc2 = 266; break; // vrefp
+ case Pavfp_RSQRTF: opc2 = 330; break; // vrsqrtefp
+ default:
+ goto bad;
+ }
+ p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 );
+ goto done;
+ }
+
case Pin_AvPerm: { // vperm
UInt v_dst = vregNo(i->Pin.AvPerm.dst);
UInt v_srcL = vregNo(i->Pin.AvPerm.srcL);
Pav_UNPCKHPIX, Pav_UNPCKLPIX,
/* Integer Binary */
- Pav_AND, Pav_OR, Pav_XOR, /* Bitwise */
-
+ Pav_AND, Pav_OR, Pav_XOR, /* Bitwise */
Pav_ADDU, Pav_QADDU, Pav_QADDS,
-
Pav_SUBU, Pav_QSUBU, Pav_QSUBS,
-
Pav_OMULU, Pav_OMULS, Pav_EMULU, Pav_EMULS,
-
Pav_AVGU, Pav_AVGS,
Pav_MAXU, Pav_MAXS,
Pav_MINU, Pav_MINS,
/* Merge */
Pav_MRGHI, Pav_MRGLO,
+ }
+ PPC32AvOp;
+
+extern HChar* showPPC32AvOp ( PPC32AvOp );
+
+
+/* --------- */
+typedef
+ enum {
+ Pavfp_INVALID,
/* Floating point binary */
- Pav_ADDF, Pav_SUBF, Pav_MULF,
- Pav_MAXF, Pav_MINF,
- Pav_CMPEQF, Pav_CMPGTF, Pav_CMPGEF,
+ Pavfp_ADDF, Pavfp_SUBF, Pavfp_MULF,
+ Pavfp_MAXF, Pavfp_MINF,
+ Pavfp_CMPEQF, Pavfp_CMPGTF, Pavfp_CMPGEF,
-//.. /* Floating point unary */
-//.. Xsse_RCPF, Xsse_RSQRTF, Xsse_SQRTF,
+ /* Floating point unary */
+ Pavfp_RCPF, Pavfp_RSQRTF,
}
- PPC32AvOp;
+ PPC32AvFpOp;
-extern HChar* showPPC32AvOp ( PPC32AvOp );
+extern HChar* showPPC32AvFpOp ( PPC32AvFpOp );
/* --------- */
Pin_AvBin32x4, /* AV binary, 32x4 */
Pin_AvBin32Fx4, /* AV FP binary, 32Fx4 */
+ Pin_AvUn32Fx4, /* AV FP unary, 32Fx4 */
Pin_AvPerm, /* AV permute (shuffle) */
Pin_AvSel, /* AV select */
HReg srcR;
} AvBin32x4;
struct {
- PPC32AvOp op;
+ PPC32AvFpOp op;
HReg dst;
HReg srcL;
HReg srcR;
} AvBin32Fx4;
+ struct {
+ PPC32AvFpOp op;
+ HReg dst;
+ HReg src;
+ } AvUn32Fx4;
/* Perm,Sel,SlDbl,Splat are all weird AV permutations */
struct {
HReg dst;
extern PPC32Instr* PPC32Instr_AvBin16x8 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvBin32x4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPC32Instr* PPC32Instr_AvUn32Fx4 ( PPC32AvOp op, HReg dst, HReg src );
extern PPC32Instr* PPC32Instr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
extern PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR );
extern PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR );
//.. {
//.. return IRExpr_Const(IRConst_U64(i));
//.. }
-//..
-//.. static IRExpr* mkU32 ( UInt i )
-//.. {
-//.. return IRExpr_Const(IRConst_U32(i));
-//.. }
+
+static IRExpr* mkU32 ( UInt i )
+{
+ return IRExpr_Const(IRConst_U32(i));
+}
static IRExpr* bind ( Int binder )
{
}
-
-
/*---------------------------------------------------------*/
/*--- ISelEnv ---*/
/*---------------------------------------------------------*/
}
+/* for each lane of vSrc: lane == nan ? laneX = all 1's : all 0's */
+static HReg isNan ( ISelEnv* env, HReg vSrc )
+{
+ vassert(hregClass(vSrc) == HRcVec128);
+
+ HReg zeros = mk_AvDuplicateRI(env, mkU32(0));
+ HReg msk_exp = mk_AvDuplicateRI(env, mkU32(0x7F800000));
+ HReg msk_mnt = mk_AvDuplicateRI(env, mkU32(0x7FFFFF));
+ HReg expt = newVRegV(env);
+ HReg mnts = newVRegV(env);
+ HReg vIsNan = newVRegV(env);
+
+ /* 32bit float => sign(1) | expontent(8) | mantissa(23)
+ nan => exponent all ones, mantissa > 0 */
+
+ addInstr(env, PPC32Instr_AvBinary(Pav_AND, expt, vSrc, msk_exp));
+ addInstr(env, PPC32Instr_AvBin32x4(Pav_CMPEQU, expt, expt, msk_exp));
+ addInstr(env, PPC32Instr_AvBinary(Pav_AND, mnts, vSrc, msk_mnt));
+ addInstr(env, PPC32Instr_AvBin32x4(Pav_CMPGTU, mnts, mnts, zeros));
+ addInstr(env, PPC32Instr_AvBinary(Pav_AND, vIsNan, expt, mnts));
+ return vIsNan;
+}
+
+
/*---------------------------------------------------------*/
/*--- ISEL: Integer expressions (32/16/8 bit) ---*/
/*---------------------------------------------------------*/
//.. addInstr(env, X86Instr_SseReRg(Xsse_OR, tmp, dst));
//.. return dst;
//.. }
-//..
-//.. case Iop_CmpNEZ32x4: {
-//.. /* Sigh, we have to generate lousy code since this has to
-//.. work on SSE1 hosts */
-//.. /* basically, the idea is: for each lane:
-//.. movl lane, %r ; negl %r (now CF = lane==0 ? 0 : 1)
-//.. sbbl %r, %r (now %r = 1Sto32(CF))
-//.. movl %r, lane
-//.. */
-//.. Int i;
-//.. X86AMode* am;
-//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
-//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
-//.. HReg dst = newVRegV(env);
-//.. HReg r32 = newVRegI(env);
-//.. sub_from_esp(env, 16);
-//.. addInstr(env, X86Instr_SseLdSt(False/*store*/, arg, esp0));
-//.. for (i = 0; i < 4; i++) {
-//.. am = X86AMode_IR(i*4, hregX86_ESP());
-//.. addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), r32));
-//.. addInstr(env, X86Instr_Unary32(Xun_NEG, X86RM_Reg(r32)));
-//.. addInstr(env, X86Instr_Alu32R(Xalu_SBB, X86RMI_Reg(r32), r32));
-//.. addInstr(env, X86Instr_Alu32M(Xalu_MOV, X86RI_Reg(r32), am));
-//.. }
-//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, dst, esp0));
-//.. add_to_esp(env, 16);
-//.. return dst;
-//.. }
-//..
+
case Iop_CmpNEZ8x16: {
HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
HReg zero = newVRegV(env);
//.. addInstr(env, X86Instr_SseReRg(Xsse_XOR, vec1, dst));
//.. return dst;
//.. }
-//..
-//.. case Iop_Recip32Fx4: op = Xsse_RCPF; goto do_32Fx4_unary;
-//.. case Iop_RSqrt32Fx4: op = Xsse_RSQRTF; goto do_32Fx4_unary;
+
+ case Iop_Recip32Fx4: op = Pavfp_RCPF; goto do_32Fx4_unary;
+ case Iop_RSqrt32Fx4: op = Pavfp_RSQRTF; goto do_32Fx4_unary;
//.. case Iop_Sqrt32Fx4: op = Xsse_SQRTF; goto do_32Fx4_unary;
-//.. do_32Fx4_unary:
-//.. {
-//.. HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
-//.. HReg dst = newVRegV(env);
-//.. addInstr(env, X86Instr_Sse32Fx4(op, arg, dst));
-//.. return dst;
-//.. }
-//..
+ do_32Fx4_unary:
+ {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPC32Instr_AvUn32Fx4(op, dst, arg));
+ return dst;
+ }
+
//.. case Iop_Recip64Fx2: op = Xsse_RCPF; goto do_64Fx2_unary;
//.. case Iop_RSqrt64Fx2: op = Xsse_RSQRTF; goto do_64Fx2_unary;
//.. case Iop_Sqrt64Fx2: op = Xsse_SQRTF; goto do_64Fx2_unary;
return dst;
}
-//.. case Iop_CmpEQ32Fx4: op = Xsse_CMPEQF; goto do_32Fx4;
-//.. case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
-//.. case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
-//.. case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
+ case Iop_Add32Fx4: op = Pavfp_ADDF; goto do_32Fx4;
+ case Iop_Sub32Fx4: op = Pavfp_SUBF; goto do_32Fx4;
+ case Iop_Max32Fx4: op = Pavfp_MAXF; goto do_32Fx4;
+ case Iop_Min32Fx4: op = Pavfp_MINF; goto do_32Fx4;
+ case Iop_Mul32Fx4: op = Pavfp_MULF; goto do_32Fx4;
//.. case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
-//.. case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
-//.. case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
-//.. case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
-//.. case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
-//.. do_32Fx4:
-//.. {
-//.. HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
-//.. HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
-//.. HReg dst = newVRegV(env);
-//.. addInstr(env, mk_vMOVsd_RR(argL, dst));
-//.. addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
-//.. return dst;
-//.. }
+ case Iop_CmpEQ32Fx4: op = Pavfp_CMPEQF; goto do_32Fx4;
+ case Iop_CmpGT32Fx4: op = Pavfp_CMPGTF; goto do_32Fx4;
+ case Iop_CmpGE32Fx4: op = Pavfp_CMPGEF; goto do_32Fx4;
+//.. case Iop_CmpLT32Fx4:
+ do_32Fx4:
+ {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPC32Instr_AvBin32Fx4(op, dst, argL, argR));
+ return dst;
+ }
+
+ case Iop_CmpLE32Fx4: {
+ HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+ HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+
+ /* stay consistent with native ppc compares:
+ if a left/right lane holds a nan, return zeros for that lane
+ so: le == NOT(gt OR isNan)
+ */
+ HReg isNanLR = newVRegV(env);
+ HReg isNanL = isNan(env, argL);
+ HReg isNanR = isNan(env, argR);
+ addInstr(env, PPC32Instr_AvBinary(Pav_OR, isNanLR, isNanL, isNanR));
+
+ addInstr(env, PPC32Instr_AvBin32Fx4(Pavfp_CMPGTF, dst, argL, argR));
+ addInstr(env, PPC32Instr_AvBinary(Pav_OR, dst, dst, isNanLR));
+ addInstr(env, PPC32Instr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
//.. case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2;
//.. case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;