code. Also, rename a few primops and add another folding rule.
git-svn-id: svn://svn.valgrind.org/vex/trunk@1449
assign( zHi, binop(Iop_Add32x4,
binop(Iop_MullEven16Ux8, mkexpr(aHi), mkexpr(bHi) ),
mkexpr(cHi)) );
- putVReg( vD_addr, binop(Iop_Narrow32Ux4, mkexpr(zHi), mkexpr(zLo)) );
+ putVReg( vD_addr, binop(Iop_Narrow32x4, mkexpr(zHi), mkexpr(zLo)) );
break;
}
/* Rotate */
case 0x004: // vrlb (Rotate Left Integer B, AV p234)
DIP("vrlb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_Rotl8x16, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_Rol8x16, mkexpr(vA), mkexpr(vB)) );
break;
case 0x044: // vrlh (Rotate Left Integer HW, AV p235)
DIP("vrlh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_Rotl16x8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_Rol16x8, mkexpr(vA), mkexpr(vB)) );
break;
case 0x084: // vrlw (Rotate Left Integer W, AV p236)
DIP("vrlw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_Rotl32x4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_Rol32x4, mkexpr(vA), mkexpr(vB)) );
break;
/* Packing */
case 0x00E: // vpkuhum (Pack Unsigned HW Unsigned Modulo, AV p224)
DIP("vpkuhum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_Narrow16Ux8, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_Narrow16x8, mkexpr(vA), mkexpr(vB)) );
return True;
case 0x04E: // vpkuwum (Pack Unsigned W Unsigned Modulo, AV p226)
DIP("vpkuwum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
- putVReg( vD_addr, binop(Iop_Narrow32Ux4, mkexpr(vA), mkexpr(vB)) );
+ putVReg( vD_addr, binop(Iop_Narrow32x4, mkexpr(vA), mkexpr(vB)) );
return True;
case 0x08E: // vpkuhus (Pack Unsigned HW Unsigned Saturate, AV p225)
assign( b_tmp, binop(Iop_OrV128, mkexpr(b1),
binop(Iop_OrV128, mkexpr(b2), mkexpr(b3))) );
- putVReg( vD_addr, binop(Iop_Narrow32Ux4,
+ putVReg( vD_addr, binop(Iop_Narrow32x4,
mkexpr(a_tmp), mkexpr(b_tmp)) );
return True;
}
return;
}
+ case Iop_Neg64: {
+ HReg yLo, yHi;
+ HReg zero = newVRegI(env);
+ HReg tLo = newVRegI(env);
+ HReg tHi = newVRegI(env);
+ iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
+ addInstr(env, PPC32Instr_LI32(zero, 0));
+ addInstr(env, PPC32Instr_AddSubC32( False/*sub*/, True /*set carry*/,
+ tLo, zero, yLo));
+ addInstr(env, PPC32Instr_AddSubC32( False/*sub*/, False/*read carry*/,
+ tHi, zero, yHi));
+ *rHi = tHi;
+ *rLo = tLo;
+ return;
+ }
+
//.. /* Not64(e) */
//.. case Iop_Not64: {
//.. HReg tLo = newVRegI(env);
addInstr(env, PPC32Instr_AvUnary(Pav_NOT, dst, dst));
return dst;
}
+
+ case Iop_CmpNEZ16x8: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg zero = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPC32Instr_AvBinary(Pav_XOR, zero, zero, zero));
+ addInstr(env, PPC32Instr_AvBin16x8(Pav_CMPEQU, dst, arg, zero));
+ addInstr(env, PPC32Instr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
+
+ case Iop_CmpNEZ32x4: {
+ HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
+ HReg zero = newVRegV(env);
+ HReg dst = newVRegV(env);
+ addInstr(env, PPC32Instr_AvBinary(Pav_XOR, zero, zero, zero));
+ addInstr(env, PPC32Instr_AvBin32x4(Pav_CMPEQU, dst, arg, zero));
+ addInstr(env, PPC32Instr_AvUnary(Pav_NOT, dst, dst));
+ return dst;
+ }
+
//.. case Iop_CmpNEZ16x8: {
//.. /* We can use SSE2 instructions for this. */
//.. HReg arg;
case Iop_Shl8x16: op = Pav_SHL; goto do_AvBin8x16;
case Iop_Shr8x16: op = Pav_SHR; goto do_AvBin8x16;
case Iop_Sar8x16: op = Pav_SAR; goto do_AvBin8x16;
- case Iop_Rotl8x16: op = Pav_ROTL; goto do_AvBin8x16;
+ case Iop_Rol8x16: op = Pav_ROTL; goto do_AvBin8x16;
case Iop_InterleaveHI8x16: op = Pav_MRGHI; goto do_AvBin8x16;
case Iop_InterleaveLO8x16: op = Pav_MRGLO; goto do_AvBin8x16;
case Iop_Add8x16: op = Pav_ADDU; goto do_AvBin8x16;
case Iop_Shl16x8: op = Pav_SHL; goto do_AvBin16x8;
case Iop_Shr16x8: op = Pav_SHR; goto do_AvBin16x8;
case Iop_Sar16x8: op = Pav_SAR; goto do_AvBin16x8;
- case Iop_Rotl16x8: op = Pav_ROTL; goto do_AvBin16x8;
- case Iop_Narrow16Ux8: op = Pav_PACKUU; goto do_AvBin16x8;
+ case Iop_Rol16x8: op = Pav_ROTL; goto do_AvBin16x8;
+ case Iop_Narrow16x8: op = Pav_PACKUU; goto do_AvBin16x8;
case Iop_QNarrow16Ux8: op = Pav_QPACKUU; goto do_AvBin16x8;
case Iop_QNarrow16Sx8: op = Pav_QPACKSS; goto do_AvBin16x8;
case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8;
case Iop_Shl32x4: op = Pav_SHL; goto do_AvBin32x4;
case Iop_Shr32x4: op = Pav_SHR; goto do_AvBin32x4;
case Iop_Sar32x4: op = Pav_SAR; goto do_AvBin32x4;
- case Iop_Rotl32x4: op = Pav_ROTL; goto do_AvBin32x4;
- case Iop_Narrow32Ux4: op = Pav_PACKUU; goto do_AvBin32x4;
+ case Iop_Rol32x4: op = Pav_ROTL; goto do_AvBin32x4;
+ case Iop_Narrow32x4: op = Pav_PACKUU; goto do_AvBin32x4;
case Iop_QNarrow32Ux4: op = Pav_QPACKUU; goto do_AvBin32x4;
case Iop_QNarrow32Sx4: op = Pav_QPACKSS; goto do_AvBin32x4;
case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4;
case Iop_Sar8x16: vex_printf("Sar8x16"); return;
case Iop_Sar16x8: vex_printf("Sar16x8"); return;
case Iop_Sar32x4: vex_printf("Sar32x4"); return;
- case Iop_Rotl8x16: vex_printf("Rotl8x16"); return;
- case Iop_Rotl16x8: vex_printf("Rotl16x8"); return;
- case Iop_Rotl32x4: vex_printf("Rotl32x4"); return;
+ case Iop_Rol8x16: vex_printf("Rol8x16"); return;
+ case Iop_Rol16x8: vex_printf("Rol16x8"); return;
+ case Iop_Rol32x4: vex_printf("Rol32x4"); return;
- case Iop_Narrow16Ux8: vex_printf("Narrow16Ux8"); return;
- case Iop_Narrow32Ux4: vex_printf("Narrow32Ux4"); return;
+ case Iop_Narrow16x8: vex_printf("Narrow16x8"); return;
+ case Iop_Narrow32x4: vex_printf("Narrow32x4"); return;
case Iop_QNarrow16Ux8: vex_printf("QNarrow16Ux8"); return;
case Iop_QNarrow32Ux4: vex_printf("QNarrow32Ux4"); return;
case Iop_QNarrow16Sx8: vex_printf("QNarrow16Sx8"); return;
case Iop_Shl8x16: case Iop_Shl16x8: case Iop_Shl32x4:
case Iop_Shr8x16: case Iop_Shr16x8: case Iop_Shr32x4:
case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4:
- case Iop_Rotl8x16: case Iop_Rotl16x8: case Iop_Rotl32x4:
+ case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:
case Iop_QNarrow16Ux8: case Iop_QNarrow32Ux4:
case Iop_QNarrow16Sx8: case Iop_QNarrow32Sx4:
- case Iop_Narrow16Ux8: case Iop_Narrow32Ux4:
+ case Iop_Narrow16x8: case Iop_Narrow32x4:
case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
? 1 : 0));
break;
+ case Iop_1Sto8:
+ e2 = IRExpr_Const(IRConst_U8(toUChar(
+ e->Iex.Unop.arg->Iex.Const.con->Ico.U1
+ ? 0xFF : 0)));
+ break;
case Iop_1Sto16:
e2 = IRExpr_Const(IRConst_U16(toUShort(
e->Iex.Unop.arg->Iex.Const.con->Ico.U1
Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4,
Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4,
Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4,
- Iop_Rotl8x16, Iop_Rotl16x8, Iop_Rotl32x4,
+ Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
/* NARROWING -- narrow 2xV128 into 1xV128, hi half from left arg */
+ /* Note: the 16{U,S} and 32{U,S} are the pre-narrow lane widths. */
Iop_QNarrow16Ux8, Iop_QNarrow32Ux4,
Iop_QNarrow16Sx8, Iop_QNarrow32Sx4,
- Iop_Narrow16Ux8, Iop_Narrow32Ux4,
+ Iop_Narrow16x8, Iop_Narrow32x4,
/* INTERLEAVING -- interleave lanes from low or high halves of
operands. Most-significant result lane is from the left