From: Carl Love Date: Tue, 1 Oct 2013 15:45:54 +0000 (+0000) Subject: Phase 3 support for IBM Power ISA 2.07 X-Git-Tag: svn/VALGRIND_3_9_0^2~16 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ed6a2b63223d090192b499e5460c0f9d9a39db24;p=thirdparty%2Fvalgrind.git Phase 3 support for IBM Power ISA 2.07 This patch adds support for the following vector instructions for doing arithmetic, min, max, shift, pack, unpack and rotate: vsubudm, vmaxud, vmaxsd, vminud, vminsd, vmulouw, vmuluwm, vmulosw, vmuleuw, vmulesw, vcmpequd, vcmpgtud, vcmpgtsd, vrld, vsld, vsrad, vsrd, vpkudus, vpksdus, vpksdss, vupkhsw, vupklsw, vmrgew, vmrgow The following Iops were added to support the above instructions: Iop_MullEven32Ux4, Iop_MullEven32Sx4, Iop_Max64Sx2, Iop_Max64Ux2, Iop_Min64Sx2, Iop_Min64Ux2, Iop_CmpGT64Ux2, Iop_Rol64x2, Iop_QNarrowBin64Sto32Ux4, Iop_QNarrowBin64Uto32Ux4, Iop_NarrowBin64to32x4, Signed-off-by: Maynard Johnson Bugzilla 324894 git-svn-id: svn://svn.valgrind.org/vex/trunk@2779 --- diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 5619d43f1d..8f17b64cdf 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -878,11 +878,22 @@ static IRExpr* mkV128from4x64U ( IRExpr* t3, IRExpr* t2, binop(Iop_ShrV128, expr_vA, mkU8(16)), \ binop(Iop_ShrV128, expr_vB, mkU8(16))) +#define MK_Iop_MullOdd32Ux4( expr_vA, expr_vB ) \ + binop(Iop_MullEven32Ux4, \ + binop(Iop_ShrV128, expr_vA, mkU8(32)), \ + binop(Iop_ShrV128, expr_vB, mkU8(32))) + #define MK_Iop_MullOdd16Sx8( expr_vA, expr_vB ) \ binop(Iop_MullEven16Sx8, \ binop(Iop_ShrV128, expr_vA, mkU8(16)), \ binop(Iop_ShrV128, expr_vB, mkU8(16))) +#define MK_Iop_MullOdd32Sx4( expr_vA, expr_vB ) \ + binop(Iop_MullEven32Sx4, \ + binop(Iop_ShrV128, expr_vA, mkU8(32)), \ + binop(Iop_ShrV128, expr_vB, mkU8(32))) + + static IRExpr* /* :: Ity_I64 */ mk64lo32Sto64 ( IRExpr* src ) { vassert(typeOfIRExpr(irsb->tyenv, src) == Ity_I64); @@ -6604,8 +6615,11 @@ static Bool dis_proc_ctl ( VexAbiInfo* vbi, UInt theInstr ) /* Reorder TBR field as per PPC32 p475 */ TBR = ((TBR & 31) << 5) | ((TBR >> 5) & 31); - if (opc1 != 0x1F || b0 != 0) { - vex_printf("dis_proc_ctl(ppc)(opc1|b0)\n"); + /* b0 = 0, inst is treated as floating point inst for reservation purposes + * b0 = 1, inst is treated as vector inst for reservation purposes + */ + if (opc1 != 0x1F) { + vex_printf("dis_proc_ctl(ppc)(opc1|b%d)\n", b0); return False; } @@ -15087,6 +15101,11 @@ static Bool dis_av_arith ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Sub32x4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x4C0: // vsubudm (Subtract Unsigned Double Word Modulo) + DIP("vsubudm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sub64x2, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x600: // vsububs (Subtract Unsigned Byte Saturate, AV p266) DIP("vsububs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, binop(Iop_QSub8Ux16, mkexpr(vA), mkexpr(vB)) ); @@ -15140,6 +15159,11 @@ static Bool dis_av_arith ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Max32Ux4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x0C2: // vmaxud (Maximum Unsigned Double word) + DIP("vmaxud v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max64Ux2, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x102: // vmaxsb (Maximum Signed Byte, AV p179) DIP("vmaxsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, binop(Iop_Max8Sx16, mkexpr(vA), mkexpr(vB)) ); @@ -15155,6 +15179,10 @@ static Bool dis_av_arith ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Max32Sx4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x1C2: // vmaxsd (Maximum Signed Double word) + DIP("vmaxsd v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Max64Sx2, mkexpr(vA), mkexpr(vB)) ); + break; /* Minimum */ case 0x202: // vminub (Minimum Unsigned Byte, AV p191) @@ -15172,6 +15200,11 @@ static Bool dis_av_arith ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Min32Ux4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x2C2: // vminud (Minimum Unsigned Double Word) + DIP("vminud v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min64Ux2, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x302: // vminsb (Minimum Signed Byte, AV p188) DIP("vminsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, binop(Iop_Min8Sx16, mkexpr(vA), mkexpr(vB)) ); @@ -15187,6 +15220,11 @@ static Bool dis_av_arith ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Min32Sx4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x3C2: // vminsd (Minimum Signed Double Word) + DIP("vminsd v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Min64Sx2, mkexpr(vA), mkexpr(vB)) ); + break; + /* Average */ case 0x402: // vavgub (Average Unsigned Byte, AV p152) @@ -15233,6 +15271,16 @@ static Bool dis_av_arith ( UInt theInstr ) binop(Iop_MullEven16Ux8, mkexpr(vA), mkexpr(vB))); break; + case 0x088: // vmulouw (Multiply Odd Unsigned Word) + DIP("vmulouw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop( Iop_MullEven32Ux4, mkexpr(vA), mkexpr(vB) ) ); + break; + + case 0x089: // vmuluwm (Multiply Unsigned Word Modulo) + DIP("vmuluwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop( Iop_Mul32x4, mkexpr(vA), mkexpr(vB) ) ); + break; + case 0x108: // vmulosb (Multiply Odd Signed Byte, AV p211) DIP("vmulosb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, @@ -15245,6 +15293,11 @@ static Bool dis_av_arith ( UInt theInstr ) binop(Iop_MullEven16Sx8, mkexpr(vA), mkexpr(vB))); break; + case 0x188: // vmulosw (Multiply Odd Signed Word) + DIP("vmulosw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop( Iop_MullEven32Sx4, mkexpr(vA), mkexpr(vB) ) ); + break; + case 0x208: // vmuleub (Multiply Even Unsigned Byte, AV p209) DIP("vmuleub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, MK_Iop_MullOdd8Ux16( mkexpr(vA), mkexpr(vB) )); @@ -15255,6 +15308,11 @@ static Bool dis_av_arith ( UInt theInstr ) putVReg( vD_addr, MK_Iop_MullOdd16Ux8( mkexpr(vA), mkexpr(vB) )); break; + case 0x288: // vmuleuw (Multiply Even Unsigned Word) + DIP("vmuleuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, MK_Iop_MullOdd32Ux4( mkexpr(vA), mkexpr(vB) ) ); + break; + case 0x308: // vmulesb (Multiply Even Signed Byte, AV p207) DIP("vmulesb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); putVReg( vD_addr, MK_Iop_MullOdd8Sx16( mkexpr(vA), mkexpr(vB) )); @@ -15265,6 +15323,10 @@ static Bool dis_av_arith ( UInt theInstr ) putVReg( vD_addr, MK_Iop_MullOdd16Sx8( mkexpr(vA), mkexpr(vB) )); break; + case 0x388: // vmulesw (Multiply Even Signed Word) + DIP("vmulesw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, MK_Iop_MullOdd32Sx4( mkexpr(vA), mkexpr(vB) ) ); + break; /* Sum Across Partial */ case 0x608: { // vsum4ubs (Sum Partial (1/4) UB Saturate, AV p275) @@ -15516,6 +15578,12 @@ static Bool dis_av_cmp ( UInt theInstr ) assign( vD, binop(Iop_CmpEQ32x4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x0C7: // vcmpequd (Compare Equal-to Unsigned Doubleword) + DIP("vcmpequd%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpEQ64x2, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x206: // vcmpgtub (Compare Greater-than Unsigned B, AV p168) DIP("vcmpgtub%s v%d,v%d,v%d\n", (flag_rC ? ".":""), vD_addr, vA_addr, vB_addr); @@ -15534,6 +15602,12 @@ static Bool dis_av_cmp ( UInt theInstr ) assign( vD, binop(Iop_CmpGT32Ux4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x2C7: // vcmpgtud (Compare Greater-than Unsigned double) + DIP("vcmpgtud%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT64Ux2, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x306: // vcmpgtsb (Compare Greater-than Signed B, AV p165) DIP("vcmpgtsb%s v%d,v%d,v%d\n", (flag_rC ? ".":""), vD_addr, vA_addr, vB_addr); @@ -15552,6 +15626,12 @@ static Bool dis_av_cmp ( UInt theInstr ) assign( vD, binop(Iop_CmpGT32Sx4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x3C7: // vcmpgtsd (Compare Greater-than Signed double) + DIP("vcmpgtsd%s v%d,v%d,v%d\n", (flag_rC ? ".":""), + vD_addr, vA_addr, vB_addr); + assign( vD, binop(Iop_CmpGT64Sx2, mkexpr(vA), mkexpr(vB)) ); + break; + default: vex_printf("dis_av_cmp(ppc)(opc2)\n"); return False; @@ -15870,6 +15950,11 @@ static Bool dis_av_shift ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Rol32x4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x0C4: // vrld (Rotate Left Integer Double Word) + DIP("vrld v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Rol64x2, mkexpr(vA), mkexpr(vB)) ); + break; + /* Shift Left */ case 0x104: // vslb (Shift Left Integer B, AV p240) @@ -15887,6 +15972,11 @@ static Bool dis_av_shift ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Shl32x4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x5C4: // vsld (Shift Left Integer Double Word) + DIP("vsld v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shl64x2, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x1C4: { // vsl (Shift Left, AV p239) IRTemp sh = newTemp(Ity_I8); DIP("vsl v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); @@ -15950,6 +16040,11 @@ static Bool dis_av_shift ( UInt theInstr ) putVReg( vD_addr, binop(Iop_Sar32x4, mkexpr(vA), mkexpr(vB)) ); break; + case 0x3C4: // vsrad (Shift Right Alg Double Word) + DIP("vsrad v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Sar64x2, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x44C: { // vsro (Shift Right by Octet, AV p258) IRTemp sh = newTemp(Ity_I8); DIP("vsro v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); @@ -15961,6 +16056,12 @@ static Bool dis_av_shift ( UInt theInstr ) break; } + case 0x6C4: // vsrd (Shift Right Double Word) + DIP("vsrd v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, binop(Iop_Shr64x2, mkexpr(vA), mkexpr(vB)) ); + break; + + default: vex_printf("dis_av_shift(ppc)(opc2)\n"); return False; @@ -16142,6 +16243,28 @@ static Bool dis_av_permute ( UInt theInstr ) unop(Iop_Dup32x4, mkU32(extend_s_8to32(SIMM_8))) ); break; + case 0x68C: // vmrgow (Merge Odd Word) + DIP("vmrgow v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + /* VD[0] <- VA[1] + VD[1] <- VB[1] + VD[2] <- VA[3] + VD[3] <- VB[3] + */ + putVReg( vD_addr, + binop(Iop_CatOddLanes32x4, mkexpr(vA), mkexpr(vB) ) ); + break; + + case 0x78C: // vmrgew (Merge Even Word) + DIP("vmrgew v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + /* VD[0] <- VA[0] + VD[1] <- VB[0] + VD[2] <- VA[2] + VD[3] <- VB[2] + */ + putVReg( vD_addr, + binop(Iop_CatEvenLanes32x4, mkexpr(vA), mkexpr(vB) ) ); + break; + default: vex_printf("dis_av_permute(ppc)(opc2)\n"); return False; @@ -16172,7 +16295,6 @@ static Bool dis_av_pack ( UInt theInstr ) vex_printf("dis_av_pack(ppc)(instr)\n"); return False; } - switch (opc2) { /* Packing */ case 0x00E: // vpkuhum (Pack Unsigned HW Unsigned Modulo, AV p224) @@ -16302,6 +16424,42 @@ static Bool dis_av_pack ( UInt theInstr ) binop(Iop_NarrowBin64to32x4, mkexpr(vA), mkexpr(vB)) ); return True; + case 0x4CE: // vpkudus (Pack Unsigned Double Word Unsigned Saturate) + DIP("vpkudus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_QNarrowBin64Uto32Ux4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + return True; + + case 0x54E: { // vpksdus (Pack Signed Double Word Unsigned Saturate) + // This insn does a doubled signed->double unsigned saturating conversion + // Conversion done here, then uses unsigned->unsigned vpk insn: + // => UnsignedSaturatingNarrow( x & ~ (x >>s 31) ) + // This is similar to the technique used for vpkswus, except done + // with double word integers versus word integers. + IRTemp vA_tmp = newTemp(Ity_V128); + IRTemp vB_tmp = newTemp(Ity_V128); + DIP("vpksdus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + assign( vA_tmp, binop(Iop_AndV128, mkexpr(vA), + unop(Iop_NotV128, + binop(Iop_SarN64x2, + mkexpr(vA), mkU8(63)))) ); + assign( vB_tmp, binop(Iop_AndV128, mkexpr(vB), + unop(Iop_NotV128, + binop(Iop_SarN64x2, + mkexpr(vB), mkU8(63)))) ); + putVReg( vD_addr, binop(Iop_QNarrowBin64Uto32Ux4, + mkexpr(vA_tmp), mkexpr(vB_tmp)) ); + // TODO: set VSCR[SAT] + return True; + } + + case 0x5CE: // vpksdss (Pack Signed double word Signed Saturate) + DIP("vpksdss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); + putVReg( vD_addr, + binop(Iop_QNarrowBin64Sto32Sx4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + return True; default: break; // Fall through... } @@ -16413,6 +16571,20 @@ static Bool dis_av_pack ( UInt theInstr ) mkexpr(z23)) ); break; } + case 0x64E: { // vupkhsw (Unpack High Signed Word) + DIP("vupkhsw v%d,v%d\n", vD_addr, vB_addr); + assign( signs, binop(Iop_CmpGT32Sx4, mkexpr(zeros), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_InterleaveHI32x4, mkexpr(signs), mkexpr(vB)) ); + break; + } + case 0x6CE: { // vupklsw (Unpack Low Signed Word) + DIP("vupklsw v%d,v%d\n", vD_addr, vB_addr); + assign( signs, binop(Iop_CmpGT32Sx4, mkexpr(zeros), mkexpr(vB)) ); + putVReg( vD_addr, + binop(Iop_InterleaveLO32x4, mkexpr(signs), mkexpr(vB)) ); + break; + } default: vex_printf("dis_av_pack(ppc)(opc2)\n"); return False; @@ -18087,7 +18259,11 @@ DisResult disInstr_PPC_WRK ( if (dis_av_arith( theInstr )) goto decode_success; goto decode_failure; - case 0x0C0: // vaddudm + case 0x088: case 0x089: // vmulouw, vmuluwm + case 0x0C0: case 0x0C2: // vaddudm, vmaxud + case 0x1C2: case 0x2C2: case 0x3C2: // vnaxsd, vminud, vminsd + case 0x188: case 0x288: case 0x388: // vmulosw, vmuleuw, vmulesw + case 0x4C0: // vsubudm if (!allow_isa_2_07) goto decode_noP8; if (dis_av_arith( theInstr )) goto decode_success; goto decode_failure; @@ -18103,6 +18279,12 @@ DisResult disInstr_PPC_WRK ( if (dis_av_shift( theInstr )) goto decode_success; goto decode_failure; + case 0x0C4: // vrld + case 0x3C4: case 0x5C4: case 0x6C4: // vsrad, vsld, vsrd + if (!allow_isa_2_07) goto decode_noP8; + if (dis_av_shift( theInstr )) goto decode_success; + goto decode_failure; + /* AV Logic */ case 0x404: case 0x444: case 0x484: // vand, vandc, vor case 0x4C4: case 0x504: // vxor, vnor @@ -18143,6 +18325,11 @@ DisResult disInstr_PPC_WRK ( if (dis_av_permute( theInstr )) goto decode_success; goto decode_failure; + case 0x68C: case 0x78C: // vmrgow, vmrgew + if (!allow_isa_2_07) goto decode_noP8; + if (dis_av_permute( theInstr )) goto decode_success; + goto decode_failure; + /* AV Pack, Unpack */ case 0x00E: case 0x04E: case 0x08E: // vpkuhum, vpkuwum, vpkuhus case 0x0CE: // vpkuwus @@ -18151,11 +18338,12 @@ DisResult disInstr_PPC_WRK ( case 0x20E: case 0x24E: case 0x28E: // vupkhsb, vupkhsh, vupklsb case 0x2CE: // vupklsh case 0x30E: case 0x34E: case 0x3CE: // vpkpx, vupkhpx, vupklpx - if (!allow_V) goto decode_noV; - if (dis_av_pack( theInstr )) goto decode_success; - goto decode_failure; + if (!allow_V) goto decode_noV; + if (dis_av_pack( theInstr )) goto decode_success; + goto decode_failure; - case 0x44E: // vpkudum + case 0x44E: case 0x4CE: case 0x54E: // vpkudum, vpkudus, vpksdus + case 0x5CE: case 0x64E: case 0x6cE: // vpksdss, vupkhsw, vupklsw if (!allow_isa_2_07) goto decode_noP8; if (dis_av_pack( theInstr )) goto decode_success; goto decode_failure; @@ -18175,6 +18363,13 @@ DisResult disInstr_PPC_WRK ( if (dis_av_cmp( theInstr )) goto decode_success; goto decode_failure; + case 0x0C7: // vcmpequd + case 0x2C7: // vcmpgtud + case 0x3C7: // vcmpgtsd + if (!allow_isa_2_07) goto decode_noP8; + if (dis_av_cmp( theInstr )) goto decode_success; + goto decode_failure; + /* AV Floating Point Compare */ case 0x0C6: case 0x1C6: case 0x2C6: // vcmpeqfp, vcmpgefp, vcmpgtfp case 0x3C6: // vcmpbfp diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 71e1335b0e..e6742ca22c 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -675,17 +675,18 @@ const HChar* showPPCAvOp ( PPCAvOp op ) { /* Integer binary */ case Pav_ADDU: return "vaddu_m"; // b,h,w,dw - case Pav_QADDU: return "vaddu_s"; // b,h,w - case Pav_QADDS: return "vadds_s"; // b,h,w + case Pav_QADDU: return "vaddu_s"; // b,h,w,dw + case Pav_QADDS: return "vadds_s"; // b,h,w,dw - case Pav_SUBU: return "vsubu_m"; // b,h,w - case Pav_QSUBU: return "vsubu_s"; // b,h,w - case Pav_QSUBS: return "vsubs_s"; // b,h,w + case Pav_SUBU: return "vsubu_m"; // b,h,w,dw + case Pav_QSUBU: return "vsubu_s"; // b,h,w,dw + case Pav_QSUBS: return "vsubs_s"; // b,h,w,dw - case Pav_OMULU: return "vmulou"; // b,h - case Pav_OMULS: return "vmulos"; // b,h - case Pav_EMULU: return "vmuleu"; // b,h - case Pav_EMULS: return "vmules"; // b,h + case Pav_MULU: return "vmulu"; // w + case Pav_OMULU: return "vmulou"; // b,h,w + case Pav_OMULS: return "vmulos"; // b,h,w + case Pav_EMULU: return "vmuleu"; // b,h,w + case Pav_EMULS: return "vmules"; // b,h,w case Pav_AVGU: return "vavgu"; // b,h,w case Pav_AVGS: return "vavgs"; // b,h,w @@ -702,10 +703,10 @@ const HChar* showPPCAvOp ( PPCAvOp op ) { case Pav_CMPGTS: return "vcmpgts"; // b,h,w /* Shift */ - case Pav_SHL: return "vsl"; // ' ',b,h,w - case Pav_SHR: return "vsr"; // ' ',b,h,w - case Pav_SAR: return "vsra"; // b,h,w - case Pav_ROTL: return "vrl"; // b,h,w + case Pav_SHL: return "vsl"; // ' ',b,h,w,dw + case Pav_SHR: return "vsr"; // ' ',b,h,w,dw + case Pav_SAR: return "vsra"; // b,h,w,dw + case Pav_ROTL: return "vrl"; // b,h,w,dw /* Pack */ case Pav_PACKUU: return "vpku_um"; // h,w,dw @@ -718,6 +719,10 @@ const HChar* showPPCAvOp ( PPCAvOp op ) { case Pav_MRGHI: return "vmrgh"; // b,h,w case Pav_MRGLO: return "vmrgl"; // b,h,w + /* Concatenation */ + case Pav_CATODD: return "vmrgow"; // w + case Pav_CATEVEN: return "vmrgew"; // w + default: vpanic("showPPCAvOp"); } } @@ -1386,6 +1391,7 @@ PPCInstr* PPCInstr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl ) { i->Pin.AvPerm.ctl = ctl; return i; } + PPCInstr* PPCInstr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ) { PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); i->tag = Pin_AvSel; @@ -4781,6 +4787,12 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, case Pav_QSUBU: opc2 = 1664; break; // vsubuws case Pav_QSUBS: opc2 = 1920; break; // vsubsws + case Pav_MULU: opc2 = 137; break; // vmuluwm + case Pav_OMULU: opc2 = 136; break; // vmulouw + case Pav_OMULS: opc2 = 392; break; // vmulosw + case Pav_EMULU: opc2 = 648; break; // vmuleuw + case Pav_EMULS: opc2 = 904; break; // vmulesw + case Pav_AVGU: opc2 = 1154; break; // vavguw case Pav_AVGS: opc2 = 1410; break; // vavgsw @@ -4807,6 +4819,9 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, case Pav_MRGHI: opc2 = 140; break; // vmrghw case Pav_MRGLO: opc2 = 396; break; // vmrglw + case Pav_CATODD: opc2 = 1676; break; // vmrgow + case Pav_CATEVEN: opc2 = 1932; break; // vmrgew + default: goto bad; } @@ -4820,13 +4835,26 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, UInt v_srcR = vregNo(i->Pin.AvBin64x2.srcR); UInt opc2; switch (i->Pin.AvBin64x2.op) { - case Pav_ADDU: opc2 = 192; break; // vaddudm vector double add - case Pav_PACKUU: opc2 = 1102; break; // vpkudum - // FIXME: We currently don't have a vector compare equal double word, so it's a hack - // to use vcmpequw, but it works. - case Pav_CMPEQU: opc2 = 134; break; // vcmpequw - default: - goto bad; + case Pav_ADDU: opc2 = 192; break; // vaddudm vector double add + case Pav_SUBU: opc2 = 1216; break; // vsubudm vector double add + case Pav_MAXU: opc2 = 194; break; // vmaxud vector double max + case Pav_MAXS: opc2 = 450; break; // vmaxsd vector double max + case Pav_MINU: opc2 = 706; break; // vminud vector double min + case Pav_MINS: opc2 = 962; break; // vminsd vector double min + case Pav_CMPEQU: opc2 = 199; break; // vcmpequd vector double compare + case Pav_CMPGTU: opc2 = 711; break; // vcmpgtud vector double compare + case Pav_CMPGTS: opc2 = 967; break; // vcmpgtsd vector double compare + case Pav_SHL: opc2 = 1476; break; // vsld + case Pav_SHR: opc2 = 1732; break; // vsrd + case Pav_SAR: opc2 = 964; break; // vsrad + case Pav_ROTL: opc2 = 196; break; // vrld + case Pav_PACKUU: opc2 = 1102; break; // vpkudum + case Pav_QPACKUU: opc2 = 1230; break; // vpkudus, vpksdus (emulated) + case Pav_QPACKSS: opc2 = 1486; break; // vpksdsm + case Pav_MRGHI: opc2 = 1614; break; // vmrghw + case Pav_MRGLO: opc2 = 1742; break; // vmrglw + default: + goto bad; } p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, opc2 ); goto done; diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index ce170a1d2a..6e6ec241c5 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -405,6 +405,7 @@ typedef Pav_AND, Pav_OR, Pav_XOR, /* Bitwise */ Pav_ADDU, Pav_QADDU, Pav_QADDS, Pav_SUBU, Pav_QSUBU, Pav_QSUBS, + Pav_MULU, Pav_OMULU, Pav_OMULS, Pav_EMULU, Pav_EMULS, Pav_AVGU, Pav_AVGS, Pav_MAXU, Pav_MAXS, @@ -422,6 +423,9 @@ typedef /* Merge */ Pav_MRGHI, Pav_MRGLO, + + /* Concatenation */ + Pav_CATODD, Pav_CATEVEN, } PPCAvOp; diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c index 3684e42ab3..e75ad43ba3 100644 --- a/VEX/priv/host_ppc_isel.c +++ b/VEX/priv/host_ppc_isel.c @@ -5044,9 +5044,14 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Max32Sx4: op = Pav_MAXS; goto do_AvBin32x4; case Iop_Min32Ux4: op = Pav_MINU; goto do_AvBin32x4; case Iop_Min32Sx4: op = Pav_MINS; goto do_AvBin32x4; + case Iop_Mul32x4: op = Pav_MULU; goto do_AvBin32x4; + case Iop_MullEven32Ux4: op = Pav_OMULU; goto do_AvBin32x4; + case Iop_MullEven32Sx4: op = Pav_OMULS; goto do_AvBin32x4; case Iop_CmpEQ32x4: op = Pav_CMPEQU; goto do_AvBin32x4; case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4; case Iop_CmpGT32Sx4: op = Pav_CMPGTS; goto do_AvBin32x4; + case Iop_CatOddLanes32x4: op = Pav_CATODD; goto do_AvBin32x4; + case Iop_CatEvenLanes32x4: op = Pav_CATEVEN; goto do_AvBin32x4; do_AvBin32x4: { HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); @@ -5055,8 +5060,24 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + case Iop_Shl64x2: op = Pav_SHL; goto do_AvBin64x2; + case Iop_Shr64x2: op = Pav_SHR; goto do_AvBin64x2; + case Iop_Sar64x2: op = Pav_SAR; goto do_AvBin64x2; + case Iop_Rol64x2: op = Pav_ROTL; goto do_AvBin64x2; case Iop_NarrowBin64to32x4: op = Pav_PACKUU; goto do_AvBin64x2; + case Iop_QNarrowBin64Sto32Sx4: op = Pav_QPACKSS; goto do_AvBin64x2; + case Iop_QNarrowBin64Uto32Ux4: op = Pav_QPACKUU; goto do_AvBin64x2; + case Iop_InterleaveHI64x2: op = Pav_MRGHI; goto do_AvBin64x2; + case Iop_InterleaveLO64x2: op = Pav_MRGLO; goto do_AvBin64x2; case Iop_Add64x2: op = Pav_ADDU; goto do_AvBin64x2; + case Iop_Sub64x2: op = Pav_SUBU; goto do_AvBin64x2; + case Iop_Max64Ux2: op = Pav_MAXU; goto do_AvBin64x2; + case Iop_Max64Sx2: op = Pav_MAXS; goto do_AvBin64x2; + case Iop_Min64Ux2: op = Pav_MINU; goto do_AvBin64x2; + case Iop_Min64Sx2: op = Pav_MINS; goto do_AvBin64x2; + case Iop_CmpEQ64x2: op = Pav_CMPEQU; goto do_AvBin64x2; + case Iop_CmpGT64Ux2: op = Pav_CMPGTU; goto do_AvBin64x2; + case Iop_CmpGT64Sx2: op = Pav_CMPGTS; goto do_AvBin64x2; do_AvBin64x2: { HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); @@ -5097,6 +5118,17 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + case Iop_ShlN64x2: op = Pav_SHL; goto do_AvShift64x2; + case Iop_ShrN64x2: op = Pav_SHR; goto do_AvShift64x2; + case Iop_SarN64x2: op = Pav_SAR; goto do_AvShift64x2; + do_AvShift64x2: { + HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1); + HReg dst = newVRegV(env); + HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2); + addInstr(env, PPCInstr_AvBin64x2(op, dst, r_src, v_shft)); + return dst; + } + case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128; case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128; do_AvShiftV128: { diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 8c4dc10655..9b62bcaa6b 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -526,6 +526,8 @@ void ppIROp ( IROp op ) case Iop_QNarrowBin16Sto8Ux8: vex_printf("QNarrowBin16Sto8Ux8"); return; case Iop_QNarrowBin16Sto8Sx8: vex_printf("QNarrowBin16Sto8Sx8"); return; case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return; + case Iop_QNarrowBin64Sto32Sx4: vex_printf("QNarrowBin64Sto32Sx4"); return; + case Iop_QNarrowBin64Uto32Ux4: vex_printf("QNarrowBin64Uto32Ux4"); return; case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return; case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return; case Iop_NarrowBin64to32x4: vex_printf("NarrowBin64to32x4"); return; @@ -775,8 +777,10 @@ void ppIROp ( IROp op ) case Iop_MullEven8Ux16: vex_printf("MullEven8Ux16"); return; case Iop_MullEven16Ux8: vex_printf("MullEven16Ux8"); return; + case Iop_MullEven32Ux4: vex_printf("MullEven32Ux4"); return; case Iop_MullEven8Sx16: vex_printf("MullEven8Sx16"); return; case Iop_MullEven16Sx8: vex_printf("MullEven16Sx8"); return; + case Iop_MullEven32Sx4: vex_printf("MullEven32Sx4"); return; case Iop_Avg8Ux16: vex_printf("Avg8Ux16"); return; case Iop_Avg16Ux8: vex_printf("Avg16Ux8"); return; @@ -788,16 +792,20 @@ void ppIROp ( IROp op ) case Iop_Max8Sx16: vex_printf("Max8Sx16"); return; case Iop_Max16Sx8: vex_printf("Max16Sx8"); return; case Iop_Max32Sx4: vex_printf("Max32Sx4"); return; + case Iop_Max64Sx2: vex_printf("Max64Sx2"); return; case Iop_Max8Ux16: vex_printf("Max8Ux16"); return; case Iop_Max16Ux8: vex_printf("Max16Ux8"); return; case Iop_Max32Ux4: vex_printf("Max32Ux4"); return; + case Iop_Max64Ux2: vex_printf("Max64Ux2"); return; case Iop_Min8Sx16: vex_printf("Min8Sx16"); return; case Iop_Min16Sx8: vex_printf("Min16Sx8"); return; case Iop_Min32Sx4: vex_printf("Min32Sx4"); return; + case Iop_Min64Sx2: vex_printf("Min64Sx2"); return; case Iop_Min8Ux16: vex_printf("Min8Ux16"); return; case Iop_Min16Ux8: vex_printf("Min16Ux8"); return; case Iop_Min32Ux4: vex_printf("Min32Ux4"); return; + case Iop_Min64Ux2: vex_printf("Min64Ux2"); return; case Iop_CmpEQ8x16: vex_printf("CmpEQ8x16"); return; case Iop_CmpEQ16x8: vex_printf("CmpEQ16x8"); return; @@ -810,6 +818,7 @@ void ppIROp ( IROp op ) case Iop_CmpGT8Ux16: vex_printf("CmpGT8Ux16"); return; case Iop_CmpGT16Ux8: vex_printf("CmpGT16Ux8"); return; case Iop_CmpGT32Ux4: vex_printf("CmpGT32Ux4"); return; + case Iop_CmpGT64Ux2: vex_printf("CmpGT64Ux2"); return; case Iop_Cnt8x16: vex_printf("Cnt8x16"); return; case Iop_Clz8Sx16: vex_printf("Clz8Sx16"); return; @@ -874,6 +883,7 @@ void ppIROp ( IROp op ) case Iop_Rol8x16: vex_printf("Rol8x16"); return; case Iop_Rol16x8: vex_printf("Rol16x8"); return; case Iop_Rol32x4: vex_printf("Rol32x4"); return; + case Iop_Rol64x2: vex_printf("Rol64x2"); return; case Iop_NarrowBin16to8x16: vex_printf("NarrowBin16to8x16"); return; case Iop_NarrowBin32to16x8: vex_printf("NarrowBin32to16x8"); return; @@ -2789,19 +2799,24 @@ void typeOfPrimop ( IROp op, case Iop_MulHi16Sx8: case Iop_MulHi32Sx4: case Iop_QDMulHi16Sx8: case Iop_QDMulHi32Sx4: case Iop_QRDMulHi16Sx8: case Iop_QRDMulHi32Sx4: - case Iop_MullEven8Ux16: case Iop_MullEven16Ux8: - case Iop_MullEven8Sx16: case Iop_MullEven16Sx8: + case Iop_MullEven8Ux16: case Iop_MullEven16Ux8: case Iop_MullEven32Ux4: + case Iop_MullEven8Sx16: case Iop_MullEven16Sx8: case Iop_MullEven32Sx4: case Iop_Avg8Ux16: case Iop_Avg16Ux8: case Iop_Avg32Ux4: case Iop_Avg8Sx16: case Iop_Avg16Sx8: case Iop_Avg32Sx4: case Iop_Max8Sx16: case Iop_Max16Sx8: case Iop_Max32Sx4: + case Iop_Max64Sx2: case Iop_Max8Ux16: case Iop_Max16Ux8: case Iop_Max32Ux4: + case Iop_Max64Ux2: case Iop_Min8Sx16: case Iop_Min16Sx8: case Iop_Min32Sx4: + case Iop_Min64Sx2: case Iop_Min8Ux16: case Iop_Min16Ux8: case Iop_Min32Ux4: + case Iop_Min64Ux2: case Iop_CmpEQ8x16: case Iop_CmpEQ16x8: case Iop_CmpEQ32x4: case Iop_CmpEQ64x2: case Iop_CmpGT8Sx16: case Iop_CmpGT16Sx8: case Iop_CmpGT32Sx4: case Iop_CmpGT64Sx2: case Iop_CmpGT8Ux16: case Iop_CmpGT16Ux8: case Iop_CmpGT32Ux4: + case Iop_CmpGT64Ux2: case Iop_Shl8x16: case Iop_Shl16x8: case Iop_Shl32x4: case Iop_Shl64x2: case Iop_QShl8x16: case Iop_QShl16x8: case Iop_QShl32x4: case Iop_QShl64x2: @@ -2810,10 +2825,11 @@ void typeOfPrimop ( IROp op, case Iop_Shr8x16: case Iop_Shr16x8: case Iop_Shr32x4: case Iop_Shr64x2: case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2: case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2: - case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4: + case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:case Iop_Rol64x2: case Iop_QNarrowBin16Sto8Ux16: case Iop_QNarrowBin32Sto16Ux8: case Iop_QNarrowBin16Sto8Sx16: case Iop_QNarrowBin32Sto16Sx8: case Iop_QNarrowBin16Uto8Ux16: case Iop_QNarrowBin32Uto16Ux8: + case Iop_QNarrowBin64Sto32Sx4: case Iop_QNarrowBin64Uto32Ux4: case Iop_NarrowBin16to8x16: case Iop_NarrowBin32to16x8: case Iop_NarrowBin64to32x4: case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index eef72babde..50d986bbb1 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -1382,8 +1382,8 @@ typedef Iop_MulHi16Ux8, Iop_MulHi32Ux4, Iop_MulHi16Sx8, Iop_MulHi32Sx4, /* (widening signed/unsigned of even lanes, with lowest lane=zero) */ - Iop_MullEven8Ux16, Iop_MullEven16Ux8, - Iop_MullEven8Sx16, Iop_MullEven16Sx8, + Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven32Ux4, + Iop_MullEven8Sx16, Iop_MullEven16Sx8, Iop_MullEven32Sx4, /* FIXME: document these */ Iop_Mull8Ux8, Iop_Mull8Sx8, Iop_Mull16Ux4, Iop_Mull16Sx4, @@ -1424,15 +1424,15 @@ typedef Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, /* MIN/MAX */ - Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, - Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, - Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, - Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, + Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2, + Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2, + Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2, + Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2, /* COMPARISON */ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2, Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, - Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, + Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2, /* COUNT ones / leading zeroes / leading sign bits (not including topmost bit) */ @@ -1450,7 +1450,7 @@ typedef Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2, Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2, - Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, + Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2, /* VECTOR x VECTOR SATURATING SHIFT */ Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2, @@ -1467,6 +1467,7 @@ typedef Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8, Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8, Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8, + Iop_QNarrowBin64Sto32Sx4, Iop_QNarrowBin64Uto32Ux4, Iop_NarrowBin64to32x4, /* NARROWING (unary) -- narrow V128 into I64 */