/*--- SSE/SSE2/SSE3 helpers ---*/
/*------------------------------------------------------------*/
+/* Indicates whether the op requires a rounding-mode argument. Note
+ that this covers only vector floating point arithmetic ops, and
+ omits the scalar ones that need rounding modes. Note also that
+ inconsistencies here will get picked up later by the IR sanity
+ checker, so this isn't correctness-critical. */
+static Bool requiresRMode ( IROp op )
+{
+ switch (op) {
+ /* 128 bit ops */
+ case Iop_Add32Fx4: case Iop_Sub32Fx4:
+ case Iop_Mul32Fx4: case Iop_Div32Fx4:
+ case Iop_Add64Fx2: case Iop_Sub64Fx2:
+ case Iop_Mul64Fx2: case Iop_Div64Fx2:
+ /* 256 bit ops */
+ case Iop_Add32Fx8: case Iop_Sub32Fx8:
+ case Iop_Mul32Fx8: case Iop_Div32Fx8:
+ case Iop_Add64Fx4: case Iop_Sub64Fx4:
+ case Iop_Mul64Fx4: case Iop_Div64Fx4:
+ return True;
+ default:
+ break;
+ }
+ return False;
+}
+
+
/* Worker function; do not call directly.
Handles full width G = G `op` E and G = (not G) `op` E.
*/
Int alen;
IRTemp addr;
UChar rm = getUChar(delta);
+ Bool needsRMode = requiresRMode(op);
IRExpr* gpart
= invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
: getXMMReg(gregOfRexRM(pfx,rm));
if (epartIsReg(rm)) {
- putXMMReg( gregOfRexRM(pfx,rm),
- binop(op, gpart,
- getXMMReg(eregOfRexRM(pfx,rm))) );
+ putXMMReg(
+ gregOfRexRM(pfx,rm),
+ needsRMode
+ ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ gpart,
+ getXMMReg(eregOfRexRM(pfx,rm)))
+ : binop(op, gpart,
+ getXMMReg(eregOfRexRM(pfx,rm)))
+ );
DIP("%s %s,%s\n", opname,
nameXMMReg(eregOfRexRM(pfx,rm)),
nameXMMReg(gregOfRexRM(pfx,rm)) );
return delta+1;
} else {
addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
- putXMMReg( gregOfRexRM(pfx,rm),
- binop(op, gpart,
- loadLE(Ity_V128, mkexpr(addr))) );
+ putXMMReg(
+ gregOfRexRM(pfx,rm),
+ needsRMode
+ ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ gpart,
+ loadLE(Ity_V128, mkexpr(addr)))
+ : binop(op, gpart,
+ loadLE(Ity_V128, mkexpr(addr)))
+ );
DIP("%s %s,%s\n", opname,
dis_buf,
nameXMMReg(gregOfRexRM(pfx,rm)) );
IRTemp subV = newTemp(Ity_V128);
IRTemp a1 = newTemp(Ity_I64);
IRTemp s0 = newTemp(Ity_I64);
+ IRTemp rm = newTemp(Ity_I32);
- assign( addV, binop(Iop_Add64Fx2, mkexpr(dV), mkexpr(sV)) );
- assign( subV, binop(Iop_Sub64Fx2, mkexpr(dV), mkexpr(sV)) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
+ assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
IRTemp addV = newTemp(Ity_V256);
IRTemp subV = newTemp(Ity_V256);
+ IRTemp rm = newTemp(Ity_I32);
a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
- assign( addV, binop(Iop_Add64Fx4, mkexpr(dV), mkexpr(sV)) );
- assign( subV, binop(Iop_Sub64Fx4, mkexpr(dV), mkexpr(sV)) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
+ assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
IRTemp addV = newTemp(Ity_V128);
IRTemp subV = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
- assign( addV, binop(Iop_Add32Fx4, mkexpr(dV), mkexpr(sV)) );
- assign( subV, binop(Iop_Sub32Fx4, mkexpr(dV), mkexpr(sV)) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
+ assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
IRTemp addV = newTemp(Ity_V256);
IRTemp subV = newTemp(Ity_V256);
+ IRTemp rm = newTemp(Ity_I32);
a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
- assign( addV, binop(Iop_Add32Fx8, mkexpr(dV), mkexpr(sV)) );
- assign( subV, binop(Iop_Sub32Fx8, mkexpr(dV), mkexpr(sV)) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
+ assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
IRTemp leftV = newTemp(Ity_V128);
IRTemp rightV = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
IRTemp res = newTemp(Ity_V128);
- assign( res, binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
- mkexpr(leftV), mkexpr(rightV) ) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
+ mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
return res;
}
IRTemp s1, s0, d1, d0;
IRTemp leftV = newTemp(Ity_V128);
IRTemp rightV = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
s1 = s0 = d1 = d0 = IRTemp_INVALID;
breakupV128to64s( sV, &s1, &s0 );
assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
IRTemp res = newTemp(Ity_V128);
- assign( res, binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
- mkexpr(leftV), mkexpr(rightV) ) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
+ mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
return res;
}
UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
IRTemp and_vec = newTemp(Ity_V128);
IRTemp sum_vec = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
assign( and_vec, binop( Iop_AndV128,
- binop( Iop_Mul64Fx2,
+ triop( Iop_Mul64Fx2,
+ mkexpr(rm),
mkexpr(dst_vec), mkexpr(src_vec) ),
mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
IRTemp tmp_prod_vec = newTemp(Ity_V128);
IRTemp prod_vec = newTemp(Ity_V128);
IRTemp sum_vec = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
IRTemp v3, v2, v1, v0;
v3 = v2 = v1 = v0 = IRTemp_INVALID;
UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
0xFFFF };
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
assign( tmp_prod_vec,
binop( Iop_AndV128,
- binop( Iop_Mul32Fx4, mkexpr(dst_vec),
- mkexpr(src_vec) ),
+ triop( Iop_Mul32Fx4,
+ mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
- assign( sum_vec, binop( Iop_Add32Fx4,
+ assign( sum_vec, triop( Iop_Add32Fx4,
+ mkexpr(rm),
binop( Iop_InterleaveHI32x4,
mkexpr(prod_vec), mkexpr(prod_vec) ),
binop( Iop_InterleaveLO32x4,
IRTemp res = newTemp(Ity_V128);
assign( res, binop( Iop_AndV128,
- binop( Iop_Add32Fx4,
+ triop( Iop_Add32Fx4,
+ mkexpr(rm),
binop( Iop_InterleaveHI32x4,
mkexpr(sum_vec), mkexpr(sum_vec) ),
binop( Iop_InterleaveLO32x4,
if (op != Iop_INVALID) {
vassert(opFn == NULL);
res = newTemp(Ity_V128);
- assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL))
- : binop(op, mkexpr(tSL), mkexpr(tSR)));
+ if (requiresRMode(op)) {
+ IRTemp rm = newTemp(Ity_I32);
+ assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
+ assign(res, swapArgs
+ ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
+ : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
+ } else {
+ assign(res, swapArgs
+ ? binop(op, mkexpr(tSR), mkexpr(tSL))
+ : binop(op, mkexpr(tSL), mkexpr(tSR)));
+ }
} else {
vassert(opFn != NULL);
res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
if (op != Iop_INVALID) {
vassert(opFn == NULL);
res = newTemp(Ity_V256);
- assign(res, swapArgs ? binop(op, mkexpr(tSR), mkexpr(tSL))
- : binop(op, mkexpr(tSL), mkexpr(tSR)));
+ if (requiresRMode(op)) {
+ IRTemp rm = newTemp(Ity_I32);
+ assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
+ assign(res, swapArgs
+ ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
+ : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
+ } else {
+ assign(res, swapArgs
+ ? binop(op, mkexpr(tSR), mkexpr(tSL))
+ : binop(op, mkexpr(tSL), mkexpr(tSR)));
+ }
} else {
vassert(opFn != NULL);
res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
/*--- SSE/SSE2/SSE3 helpers ---*/
/*------------------------------------------------------------*/
+/* Indicates whether the op requires a rounding-mode argument. Note
+ that this covers only vector floating point arithmetic ops, and
+ omits the scalar ones that need rounding modes. Note also that
+ inconsistencies here will get picked up later by the IR sanity
+ checker, so this isn't correctness-critical. */
+static Bool requiresRMode ( IROp op )
+{
+ switch (op) {
+ /* 128 bit ops */
+ case Iop_Add32Fx4: case Iop_Sub32Fx4:
+ case Iop_Mul32Fx4: case Iop_Div32Fx4:
+ case Iop_Add64Fx2: case Iop_Sub64Fx2:
+ case Iop_Mul64Fx2: case Iop_Div64Fx2:
+ return True;
+ default:
+ break;
+ }
+ return False;
+}
+
+
/* Worker function; do not call directly.
Handles full width G = G `op` E and G = (not G) `op` E.
*/
= invertG ? unop(Iop_NotV128, getXMMReg(gregOfRM(rm)))
: getXMMReg(gregOfRM(rm));
if (epartIsReg(rm)) {
- putXMMReg( gregOfRM(rm),
- binop(op, gpart,
- getXMMReg(eregOfRM(rm))) );
+ putXMMReg(
+ gregOfRM(rm),
+ requiresRMode(op)
+ ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ gpart,
+ getXMMReg(eregOfRM(rm)))
+ : binop(op, gpart,
+ getXMMReg(eregOfRM(rm)))
+ );
DIP("%s %s,%s\n", opname,
nameXMMReg(eregOfRM(rm)),
nameXMMReg(gregOfRM(rm)) );
return delta+1;
} else {
addr = disAMode ( &alen, sorb, delta, dis_buf );
- putXMMReg( gregOfRM(rm),
- binop(op, gpart,
- loadLE(Ity_V128, mkexpr(addr))) );
+ putXMMReg(
+ gregOfRM(rm),
+ requiresRMode(op)
+ ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
+ gpart,
+ loadLE(Ity_V128, mkexpr(addr)))
+ : binop(op, gpart,
+ loadLE(Ity_V128, mkexpr(addr)))
+ );
DIP("%s %s,%s\n", opname,
dis_buf,
nameXMMReg(gregOfRM(rm)) );
IRTemp gV = newTemp(Ity_V128);
IRTemp addV = newTemp(Ity_V128);
IRTemp subV = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
modrm = insn[3];
assign( gV, getXMMReg(gregOfRM(modrm)) );
- assign( addV, binop(Iop_Add32Fx4, mkexpr(gV), mkexpr(eV)) );
- assign( subV, binop(Iop_Sub32Fx4, mkexpr(gV), mkexpr(eV)) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
+ assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
breakup128to32s( addV, &a3, &a2, &a1, &a0 );
breakup128to32s( subV, &s3, &s2, &s1, &s0 );
IRTemp subV = newTemp(Ity_V128);
IRTemp a1 = newTemp(Ity_I64);
IRTemp s0 = newTemp(Ity_I64);
+ IRTemp rm = newTemp(Ity_I32);
modrm = insn[2];
if (epartIsReg(modrm)) {
assign( gV, getXMMReg(gregOfRM(modrm)) );
- assign( addV, binop(Iop_Add64Fx2, mkexpr(gV), mkexpr(eV)) );
- assign( subV, binop(Iop_Sub64Fx2, mkexpr(gV), mkexpr(eV)) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
+ assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
+ assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(gV), mkexpr(eV)) );
assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
IRTemp gV = newTemp(Ity_V128);
IRTemp leftV = newTemp(Ity_V128);
IRTemp rightV = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
Bool isAdd = insn[2] == 0x7C;
const HChar* str = isAdd ? "add" : "sub";
e3 = e2 = e1 = e0 = g3 = g2 = g1 = g0 = IRTemp_INVALID;
assign( leftV, mk128from32s( e2, e0, g2, g0 ) );
assign( rightV, mk128from32s( e3, e1, g3, g1 ) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
putXMMReg( gregOfRM(modrm),
- binop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
- mkexpr(leftV), mkexpr(rightV) ) );
+ triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
+ mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
goto decode_success;
}
IRTemp gV = newTemp(Ity_V128);
IRTemp leftV = newTemp(Ity_V128);
IRTemp rightV = newTemp(Ity_V128);
+ IRTemp rm = newTemp(Ity_I32);
Bool isAdd = insn[1] == 0x7C;
const HChar* str = isAdd ? "add" : "sub";
assign( leftV, binop(Iop_64HLtoV128, mkexpr(e0),mkexpr(g0)) );
assign( rightV, binop(Iop_64HLtoV128, mkexpr(e1),mkexpr(g1)) );
+ assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
putXMMReg( gregOfRM(modrm),
- binop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
- mkexpr(leftV), mkexpr(rightV) ) );
+ triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
+ mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
goto decode_success;
}
case Iop_CmpLT32Fx4: op = Asse_CMPLTF; goto do_32Fx4;
case Iop_CmpLE32Fx4: op = Asse_CMPLEF; goto do_32Fx4;
case Iop_CmpUN32Fx4: op = Asse_CMPUNF; goto do_32Fx4;
- case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4;
- case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4;
case Iop_Max32Fx4: op = Asse_MAXF; goto do_32Fx4;
case Iop_Min32Fx4: op = Asse_MINF; goto do_32Fx4;
- case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4;
- case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4;
do_32Fx4:
{
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
case Iop_CmpLT64Fx2: op = Asse_CMPLTF; goto do_64Fx2;
case Iop_CmpLE64Fx2: op = Asse_CMPLEF; goto do_64Fx2;
case Iop_CmpUN64Fx2: op = Asse_CMPUNF; goto do_64Fx2;
- case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2;
- case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2;
case Iop_Max64Fx2: op = Asse_MAXF; goto do_64Fx2;
case Iop_Min64Fx2: op = Asse_MINF; goto do_64Fx2;
- case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2;
- case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2;
do_64Fx2:
{
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
} /* switch (e->Iex.Binop.op) */
} /* if (e->tag == Iex_Binop) */
+ if (e->tag == Iex_Triop) {
+ IRTriop *triop = e->Iex.Triop.details;
+ switch (triop->op) {
+
+ case Iop_Add64Fx2: op = Asse_ADDF; goto do_64Fx2_w_rm;
+ case Iop_Sub64Fx2: op = Asse_SUBF; goto do_64Fx2_w_rm;
+ case Iop_Mul64Fx2: op = Asse_MULF; goto do_64Fx2_w_rm;
+ case Iop_Div64Fx2: op = Asse_DIVF; goto do_64Fx2_w_rm;
+ do_64Fx2_w_rm:
+ {
+ HReg argL = iselVecExpr(env, triop->arg2);
+ HReg argR = iselVecExpr(env, triop->arg3);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, AMD64Instr_Sse64Fx2(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_Add32Fx4: op = Asse_ADDF; goto do_32Fx4_w_rm;
+ case Iop_Sub32Fx4: op = Asse_SUBF; goto do_32Fx4_w_rm;
+ case Iop_Mul32Fx4: op = Asse_MULF; goto do_32Fx4_w_rm;
+ case Iop_Div32Fx4: op = Asse_DIVF; goto do_32Fx4_w_rm;
+ do_32Fx4_w_rm:
+ {
+ HReg argL = iselVecExpr(env, triop->arg2);
+ HReg argR = iselVecExpr(env, triop->arg3);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, AMD64Instr_Sse32Fx4(op, argR, dst));
+ return dst;
+ }
+
+ default:
+ break;
+ } /* switch (triop->op) */
+ } /* if (e->tag == Iex_Triop) */
+
if (e->tag == Iex_ITE) { // VFD
HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
if (e->tag == Iex_Binop) {
switch (e->Iex.Binop.op) {
- case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4;
- case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4;
- case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4;
- case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4;
case Iop_Max64Fx4: op = Asse_MAXF; goto do_64Fx4;
case Iop_Min64Fx4: op = Asse_MINF; goto do_64Fx4;
do_64Fx4:
return;
}
- case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8;
- case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8;
- case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8;
- case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8;
case Iop_Max32Fx8: op = Asse_MAXF; goto do_32Fx8;
case Iop_Min32Fx8: op = Asse_MINF; goto do_32Fx8;
do_32Fx8:
} /* switch (e->Iex.Binop.op) */
} /* if (e->tag == Iex_Binop) */
+ if (e->tag == Iex_Triop) {
+ IRTriop *triop = e->Iex.Triop.details;
+ switch (triop->op) {
+
+ case Iop_Add64Fx4: op = Asse_ADDF; goto do_64Fx4_w_rm;
+ case Iop_Sub64Fx4: op = Asse_SUBF; goto do_64Fx4_w_rm;
+ case Iop_Mul64Fx4: op = Asse_MULF; goto do_64Fx4_w_rm;
+ case Iop_Div64Fx4: op = Asse_DIVF; goto do_64Fx4_w_rm;
+ do_64Fx4_w_rm:
+ {
+ HReg argLhi, argLlo, argRhi, argRlo;
+ iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
+ iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
+ HReg dstHi = newVRegV(env);
+ HReg dstLo = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
+ addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, AMD64Instr_Sse64Fx2(op, argRhi, dstHi));
+ addInstr(env, AMD64Instr_Sse64Fx2(op, argRlo, dstLo));
+ *rHi = dstHi;
+ *rLo = dstLo;
+ return;
+ }
+
+ case Iop_Add32Fx8: op = Asse_ADDF; goto do_32Fx8_w_rm;
+ case Iop_Sub32Fx8: op = Asse_SUBF; goto do_32Fx8_w_rm;
+ case Iop_Mul32Fx8: op = Asse_MULF; goto do_32Fx8_w_rm;
+ case Iop_Div32Fx8: op = Asse_DIVF; goto do_32Fx8_w_rm;
+ do_32Fx8_w_rm:
+ {
+ HReg argLhi, argLlo, argRhi, argRlo;
+ iselDVecExpr(&argLhi, &argLlo, env, triop->arg2);
+ iselDVecExpr(&argRhi, &argRlo, env, triop->arg3);
+ HReg dstHi = newVRegV(env);
+ HReg dstLo = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argLhi, dstHi));
+ addInstr(env, mk_vMOVsd_RR(argLlo, dstLo));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, AMD64Instr_Sse32Fx4(op, argRhi, dstHi));
+ addInstr(env, AMD64Instr_Sse32Fx4(op, argRlo, dstLo));
+ *rHi = dstHi;
+ *rLo = dstLo;
+ return;
+ }
+
+ default:
+ break;
+ } /* switch (triop->op) */
+ } /* if (e->tag == Iex_Triop) */
+
+
if (e->tag == Iex_Qop && e->Iex.Qop.details->op == Iop_64x4toV256) {
HReg rsp = hregAMD64_RSP();
HReg vHi = newVRegV(env);
case Iop_CmpLT32Fx4: op = Xsse_CMPLTF; goto do_32Fx4;
case Iop_CmpLE32Fx4: op = Xsse_CMPLEF; goto do_32Fx4;
case Iop_CmpUN32Fx4: op = Xsse_CMPUNF; goto do_32Fx4;
- case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4;
- case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4;
case Iop_Max32Fx4: op = Xsse_MAXF; goto do_32Fx4;
case Iop_Min32Fx4: op = Xsse_MINF; goto do_32Fx4;
- case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4;
- case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4;
do_32Fx4:
{
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2;
case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2;
case Iop_CmpUN64Fx2: op = Xsse_CMPUNF; goto do_64Fx2;
- case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2;
- case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2;
case Iop_Max64Fx2: op = Xsse_MAXF; goto do_64Fx2;
case Iop_Min64Fx2: op = Xsse_MINF; goto do_64Fx2;
- case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2;
- case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2;
do_64Fx2:
{
HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
} /* switch (e->Iex.Binop.op) */
} /* if (e->tag == Iex_Binop) */
+
+ if (e->tag == Iex_Triop) {
+ IRTriop *triop = e->Iex.Triop.details;
+ switch (triop->op) {
+
+ case Iop_Add32Fx4: op = Xsse_ADDF; goto do_32Fx4_w_rm;
+ case Iop_Sub32Fx4: op = Xsse_SUBF; goto do_32Fx4_w_rm;
+ case Iop_Mul32Fx4: op = Xsse_MULF; goto do_32Fx4_w_rm;
+ case Iop_Div32Fx4: op = Xsse_DIVF; goto do_32Fx4_w_rm;
+ do_32Fx4_w_rm:
+ {
+ HReg argL = iselVecExpr(env, triop->arg2);
+ HReg argR = iselVecExpr(env, triop->arg3);
+ HReg dst = newVRegV(env);
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, X86Instr_Sse32Fx4(op, argR, dst));
+ return dst;
+ }
+
+ case Iop_Add64Fx2: op = Xsse_ADDF; goto do_64Fx2_w_rm;
+ case Iop_Sub64Fx2: op = Xsse_SUBF; goto do_64Fx2_w_rm;
+ case Iop_Mul64Fx2: op = Xsse_MULF; goto do_64Fx2_w_rm;
+ case Iop_Div64Fx2: op = Xsse_DIVF; goto do_64Fx2_w_rm;
+ do_64Fx2_w_rm:
+ {
+ HReg argL = iselVecExpr(env, triop->arg2);
+ HReg argR = iselVecExpr(env, triop->arg3);
+ HReg dst = newVRegV(env);
+ REQUIRE_SSE2;
+ addInstr(env, mk_vMOVsd_RR(argL, dst));
+ /* XXXROUNDINGFIXME */
+ /* set roundingmode here */
+ addInstr(env, X86Instr_Sse64Fx2(op, argR, dst));
+ return dst;
+ }
+
+ default:
+ break;
+ } /* switch (triop->op) */
+ } /* if (e->tag == Iex_Triop) */
+
+
if (e->tag == Iex_ITE) { // VFD
HReg r1 = iselVecExpr(env, e->Iex.ITE.iftrue);
HReg r0 = iselVecExpr(env, e->Iex.ITE.iffalse);
case Iop_CmpEQ64F0x2: case Iop_CmpLT64F0x2:
case Iop_CmpLE32F0x4: case Iop_CmpUN32F0x4:
case Iop_CmpLE64F0x2: case Iop_CmpUN64F0x2:
- case Iop_Add32Fx4: case Iop_Add32F0x4:
- case Iop_Add64Fx2: case Iop_Add64F0x2:
- case Iop_Div32Fx4: case Iop_Div32F0x4:
- case Iop_Div64Fx2: case Iop_Div64F0x2:
+ case Iop_Add32F0x4:
+ case Iop_Add64F0x2:
+ case Iop_Div32F0x4:
+ case Iop_Div64F0x2:
case Iop_Max32Fx4: case Iop_Max32F0x4:
case Iop_PwMax32Fx4: case Iop_PwMin32Fx4:
case Iop_Max64Fx2: case Iop_Max64F0x2:
case Iop_Min32Fx4: case Iop_Min32F0x4:
case Iop_Min64Fx2: case Iop_Min64F0x2:
- case Iop_Mul32Fx4: case Iop_Mul32F0x4:
- case Iop_Mul64Fx2: case Iop_Mul64F0x2:
- case Iop_Sub32Fx4: case Iop_Sub32F0x4:
- case Iop_Sub64Fx2: case Iop_Sub64F0x2:
+ case Iop_Mul32F0x4:
+ case Iop_Mul64F0x2:
+ case Iop_Sub32F0x4:
+ case Iop_Sub64F0x2:
case Iop_AndV128: case Iop_OrV128: case Iop_XorV128:
case Iop_Add8x16: case Iop_Add16x8:
case Iop_Add32x4: case Iop_Add64x2:
case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2:
BINARY(Ity_I64, Ity_I64, Ity_V128);
- /* s390 specific */
+ /* s390 specific */
case Iop_MAddF32:
case Iop_MSubF32:
QUATERNARY(ity_RMode,Ity_F32,Ity_F32,Ity_F32, Ity_F32);
case Iop_DivF128:
TERNARY(ity_RMode,Ity_F128,Ity_F128, Ity_F128);
+ case Iop_Add64Fx2: case Iop_Sub64Fx2:
+ case Iop_Mul64Fx2: case Iop_Div64Fx2:
+ case Iop_Add32Fx4: case Iop_Sub32Fx4:
+ case Iop_Mul32Fx4: case Iop_Div32Fx4:
+ TERNARY(ity_RMode,Ity_V128,Ity_V128, Ity_V128);
+
+ case Iop_Add64Fx4: case Iop_Sub64Fx4:
+ case Iop_Mul64Fx4: case Iop_Div64Fx4:
+ case Iop_Add32Fx8: case Iop_Sub32Fx8:
+ case Iop_Mul32Fx8: case Iop_Div32Fx8:
+ TERNARY(ity_RMode,Ity_V256,Ity_V256, Ity_V256);
+
case Iop_NegF128:
case Iop_AbsF128:
UNARY(Ity_F128, Ity_F128);
case Iop_64x4toV256:
QUATERNARY(Ity_I64, Ity_I64, Ity_I64, Ity_I64, Ity_V256);
- case Iop_Add64Fx4: case Iop_Sub64Fx4:
- case Iop_Mul64Fx4: case Iop_Div64Fx4:
- case Iop_Add32Fx8: case Iop_Sub32Fx8:
- case Iop_Mul32Fx8: case Iop_Div32Fx8:
case Iop_AndV256: case Iop_OrV256:
case Iop_XorV256:
case Iop_Max32Fx8: case Iop_Min32Fx8:
&& e->Iex.Const.con->Ico.U64 == 0);
}
+/* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
+static Bool isZeroV128 ( IRExpr* e )
+{
+ return toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_V128
+ && e->Iex.Const.con->Ico.V128 == 0x0000);
+}
+
+/* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
+static Bool isZeroV256 ( IRExpr* e )
+{
+ return toBool( e->tag == Iex_Const
+ && e->Iex.Const.con->tag == Ico_V256
+ && e->Iex.Const.con->Ico.V256 == 0x00000000);
+}
+
/* Is this an integer constant with value 0 ? */
static Bool isZeroU ( IRExpr* e )
{
}
break;
}
+ /* Same reasoning for the 256-bit version. */
+ case Iop_V128HLtoV256: {
+ IRExpr* argHi = e->Iex.Binop.arg1;
+ IRExpr* argLo = e->Iex.Binop.arg2;
+ if (isZeroV128(argHi) && isZeroV128(argLo)) {
+ e2 = IRExpr_Const(IRConst_V256(0));
+ } else {
+ goto unhandled;
+ }
+ break;
+ }
/* -- V128 stuff -- */
case Iop_InterleaveLO8x16: {
e2 = e->Iex.Binop.arg1;
break;
}
+ /* OrV128(t,0) ==> t */
+ if (e->Iex.Binop.op == Iop_OrV128) {
+ if (isZeroV128(e->Iex.Binop.arg2)) {
+ e2 = e->Iex.Binop.arg1;
+ break;
+ }
+ if (isZeroV128(e->Iex.Binop.arg1)) {
+ e2 = e->Iex.Binop.arg2;
+ break;
+ }
+ }
+ /* OrV256(t,0) ==> t */
+ if (e->Iex.Binop.op == Iop_OrV256) {
+ if (isZeroV256(e->Iex.Binop.arg2)) {
+ e2 = e->Iex.Binop.arg1;
+ break;
+ }
+ //Disabled because there's no known test case right now.
+ //if (isZeroV256(e->Iex.Binop.arg1)) {
+ // e2 = e->Iex.Binop.arg2;
+ // break;
+ //}
+ }
break;
case Iop_Xor8:
/* BCD arithmetic instructions, (V128, V128) -> V128
* The BCD format is the same as that used in the BCD<->DPB conversion
- * routines, except using 124 digits (vs 60) plus the trailing 4-bit signed code.
- * */
+ * routines, except using 124 digits (vs 60) plus the trailing 4-bit
+ * signed code. */
Iop_BCDAdd, Iop_BCDSub,
/* Conversion I64 -> D64 */
/* --- 32x4 vector FP --- */
- /* binary */
+ /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */
Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
+
+ /* binary */
Iop_Max32Fx4, Iop_Min32Fx4,
Iop_Add32Fx2, Iop_Sub32Fx2,
/* Note: For the following compares, the ppc and arm front-ends assume a
/* --- 64x2 vector FP --- */
- /* binary */
+ /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */
Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
+
+ /* binary */
Iop_Max64Fx2, Iop_Min64Fx2,
Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
Iop_SHA512, Iop_SHA256,
/* ------------------ 256-bit SIMD FP. ------------------ */
- Iop_Add64Fx4,
- Iop_Sub64Fx4,
- Iop_Mul64Fx4,
- Iop_Div64Fx4,
- Iop_Add32Fx8,
- Iop_Sub32Fx8,
- Iop_Mul32Fx8,
- Iop_Div32Fx8,
+
+ /* ternary :: IRRoundingMode(I32) x V256 x V256 -> V256 */
+ Iop_Add64Fx4, Iop_Sub64Fx4, Iop_Mul64Fx4, Iop_Div64Fx4,
+ Iop_Add32Fx8, Iop_Sub32Fx8, Iop_Mul32Fx8, Iop_Div32Fx8,
Iop_Sqrt32Fx8,
Iop_Sqrt64Fx4,