case 0x2B: { // vperm (Permute, AV p218)
/* limited to two args for IR, so have to play games... */
- IRTemp a_perm = newTemp(Ity_V128);
- IRTemp b_perm = newTemp(Ity_V128);
- IRTemp mask = newTemp(Ity_V128);
- DIP("vperma v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr);
- assign( a_perm, binop(Iop_Perm, mkexpr(vA), mkexpr(vC)) );
- assign( b_perm, binop(Iop_Perm, mkexpr(vB), mkexpr(vC)) );
+ IRTemp a_perm = newTemp(Ity_V128);
+ IRTemp b_perm = newTemp(Ity_V128);
+ IRTemp mask = newTemp(Ity_V128);
+ IRTemp vC_andF = newTemp(Ity_V128);
+ DIP("vperm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr);
+ /* Limit the Perm8x16 steering values to 0 .. 15 as that is what
+ IR specifies, and also to hide irrelevant bits from
+ memcheck */
+ assign( vC_andF, binop(Iop_AndV128, mkexpr(vC),
+ unop(Iop_Dup8x16, mkU8(0xF))) );
+ assign( a_perm, binop(Iop_Perm8x16, mkexpr(vA), mkexpr(vC_andF)) );
+ assign( b_perm, binop(Iop_Perm8x16, mkexpr(vB), mkexpr(vC_andF)) );
// mask[i8] = (vC[i8]_4 == 1) ? 0xFF : 0x0
assign( mask, binop(Iop_SarN8x16,
binop(Iop_ShlN8x16, mkexpr(vC), mkU8(3)),
addHRegUse(u, HRmRead, i->Pin.AvUnary.src);
return;
case Pin_AvBinary:
- addHRegUse(u, HRmWrite, i->Pin.AvBinary.dst);
- addHRegUse(u, HRmRead, i->Pin.AvBinary.srcL);
- addHRegUse(u, HRmRead, i->Pin.AvBinary.srcR);
+ if (i->Pin.AvBinary.op == Pav_XOR
+ && i->Pin.AvBinary.dst == i->Pin.AvBinary.srcL
+ && i->Pin.AvBinary.dst == i->Pin.AvBinary.srcR) {
+ /* reg-alloc needs to understand 'xor r,r,r' as a write of r */
+ /* (as opposed to a rite of passage :-) */
+ addHRegUse(u, HRmWrite, i->Pin.AvBinary.dst);
+ } else {
+ addHRegUse(u, HRmWrite, i->Pin.AvBinary.dst);
+ addHRegUse(u, HRmRead, i->Pin.AvBinary.srcL);
+ addHRegUse(u, HRmRead, i->Pin.AvBinary.srcR);
+ }
return;
case Pin_AvBin8x16:
addHRegUse(u, HRmWrite, i->Pin.AvBin8x16.dst);
//.. return dst;
//.. }
//..
-//.. case Iop_CmpNEZ8x16:
+ case Iop_CmpNEZ8x16: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg zero = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPC32Instr_AvBinary(Pav_XOR, zero, zero, zero));
+ addInstr(env, PPC32Instr_AvBin8x16(Pav_CMPEQU, dst, arg, zero));
+ addInstr(env, PPC32Instr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
//.. case Iop_CmpNEZ16x8: {
//.. /* We can use SSE2 instructions for this. */
//.. HReg arg;
return dst;
}
- case Iop_Perm: {
+ case Iop_Perm8x16: {
HReg dst = newVRegV(env);
HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1);
HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2);
case Iop_InterleaveLO32x4: vex_printf("InterleaveLO32x4"); return;
case Iop_InterleaveLO64x2: vex_printf("InterleaveLO64x2"); return;
- case Iop_Perm: vex_printf("Perm"); return;
+ case Iop_Perm8x16: vex_printf("Perm8x16"); return;
default: vpanic("ppIROp(1)");
}
case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
case Iop_InterleaveLO32x4: case Iop_InterleaveLO64x2:
- case Iop_Perm:
+ case Iop_Perm8x16:
BINARY(Ity_V128, Ity_V128,Ity_V128);
case Iop_NotV128:
Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4,
/* PERMUTING -- copy src bytes to dst,
- as indexed by control vector bytes: dst[i] = src[ ctl[i] ] */
- Iop_Perm
+ as indexed by control vector bytes:
+ for i in 0 .. 15 . result[i] = argL[ argR[i] ]
+ argR[i] values may only be in the range 0 .. 15, else behaviour
+ is undefined. */
+ Iop_Perm8x16
}
IROp;