From: Cerion Armour-Brown Date: Thu, 15 Sep 2005 12:42:16 +0000 (+0000) Subject: Implemented simple AltiVec arithmetic insns: X-Git-Tag: svn/VALGRIND_3_1_1^2~96 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b9e23ffd5372283e07d072d5aeb57da32a7507ef;p=thirdparty%2Fvalgrind.git Implemented simple AltiVec arithmetic insns: - add, sub, max, min, avg, hi/lo mul and all varieties thereof: (un)signed, (un)saturated, 8|16|32 lane size... fixed backend hi/lo_mul: only valid for 16|32 bit lanes, not 8. git-svn-id: svn://svn.valgrind.org/vex/trunk@1395 --- diff --git a/VEX/priv/guest-ppc32/toIR.c b/VEX/priv/guest-ppc32/toIR.c index b56488d4fb..7e53876db8 100644 --- a/VEX/priv/guest-ppc32/toIR.c +++ b/VEX/priv/guest-ppc32/toIR.c @@ -5068,247 +5068,264 @@ static Bool dis_av_arith ( UInt theInstr ) /* Add */ case 0x180: { // vaddcuw (Add Carryout Unsigned Word, AV p136) DIP("vaddcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - /* ov = x >u (x+y) */ - IRTemp sum = newTemp(Ity_V128); - assign( sum, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) ); + /* unsigned_ov(x+y) = (y >u not(x)) */ putVReg( vD_addr, binop(Iop_ShrN32x4, - binop(Iop_CmpGT32Ux4, mkexpr(vA), mkexpr(sum)), + binop(Iop_CmpGT32Ux4, mkexpr(vB), + unop(Iop_NotV128, mkexpr(vA))), mkU8(31)) ); break; } case 0x000: // vaddubm (Add Unsigned Byte Modulo, AV p141) DIP("vaddubm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_Add8x16, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x040: // vadduhm (Add Unsigned Half Word Modulo, AV p143) DIP("vadduhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_Add16x8, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x080: // vadduwm (Add Unsigned Word Modulo, AV p145) DIP("vadduwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_Add32x4, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x200: // vaddubs (Add Unsigned Byte Saturate, AV p142) DIP("vaddubs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QAdd8Ux16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT], perhaps via new primop: Iop_SatOfQAdd8Ux16 + break; + case 0x240: // vadduhs (Add Unsigned Half Word Saturate, AV p144) DIP("vadduhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QAdd16Ux8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x280: // vadduws (Add Unsigned Word Saturate, AV p146) DIP("vadduws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QAdd32Ux4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x300: // vaddsbs (Add Signed Byte Saturate, AV p138) DIP("vaddsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QAdd8Sx16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x340: // vaddshs (Add Signed Half Word Saturate, AV p139) DIP("vaddshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QAdd16Sx8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x380: // vaddsws (Add Signed Word Saturate, AV p140) DIP("vaddsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QAdd32Sx4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + + /* Subtract */ - case 0x580: // vsubcuw (Subtract Carryout Unsigned Word, AV p260) + case 0x580: { // vsubcuw (Subtract Carryout Unsigned Word, AV p260) DIP("vsubcuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + /* unsigned_ov(x-y) = (y >u x) */ + putVReg( vD_addr, binop(Iop_ShrN32x4, + unop(Iop_NotV128, + binop(Iop_CmpGT32Ux4, mkexpr(vB), + mkexpr(vA))), + mkU8(31)) ); + break; + } case 0x400: // vsububm (Subtract Unsigned Byte Modulo, AV p265) DIP("vsububm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_Sub8x16, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x440: // vsubuhm (Subtract Unsigned Half Word Modulo, AV p267) DIP("vsubuhm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_Sub16x8, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x480: // vsubuwm (Subtract Unsigned Word Modulo, AV p269) DIP("vsubuwm v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_Sub32x4, mkexpr(vA), mkexpr(vB)) ); + break; + case 0x600: // vsububs (Subtract Unsigned Byte Saturate, AV p266) DIP("vsububs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QSub8Ux16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x640: // vsubuhs (Subtract Unsigned Half Word Saturate, AV p268) DIP("vsubuhs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QSub16Ux8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x680: // vsubuws (Subtract Unsigned Word Saturate, AV p270) DIP("vsubuws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QSub32Ux4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x700: // vsubsbs (Subtract Signed Byte Saturate, AV p262) DIP("vsubsbs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QSub8Sx16, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x740: // vsubshs (Subtract Signed Half Word Saturate, AV p263) DIP("vsubshs v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_QSub16Sx8, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; + case 0x780: // vsubsws (Subtract Signed Word Saturate, AV p264) DIP("vsubsws v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_QSub32Sx4, mkexpr(vA), mkexpr(vB)) ); + // TODO: set VSCR[SAT] + break; /* Maximum */ case 0x002: // vmaxub (Maximum Unsigned Byte, AV p182) DIP("vmaxub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Max8Ux16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x042: // vmaxuh (Maximum Unsigned Half Word, AV p183) DIP("vmaxuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Max16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x082: // vmaxuw (Maximum Unsigned Word, AV p184) DIP("vmaxuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Max32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; case 0x102: // vmaxsb (Maximum Signed Byte, AV p179) DIP("vmaxsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Max8Sx16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x142: // vmaxsh (Maximum Signed Half Word, AV p180) DIP("vmaxsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Max16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x182: // vmaxsw (Maximum Signed Word, AV p181) DIP("vmaxsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Max32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; /* Minimum */ case 0x202: // vminub (Minimum Unsigned Byte, AV p191) DIP("vminub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Min8Ux16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x242: // vminuh (Minimum Unsigned Half Word, AV p192) DIP("vminuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Min16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x282: // vminuw (Minimum Unsigned Word, AV p193) DIP("vminuw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Min32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; case 0x302: // vminsb (Minimum Signed Byte, AV p188) DIP("vminsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Min8Sx16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x342: // vminsh (Minimum Signed Half Word, AV p189) DIP("vminsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Min16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x382: // vminsw (Minimum Signed Word, AV p190) DIP("vminsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + putVReg( vD_addr, binop(Iop_Min32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; + /* Average */ case 0x402: // vavgub (Average Unsigned Byte, AV p152) DIP("vavgub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Avg8Ux16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x442: // vavguh (Average Unsigned Half Word, AV p153) DIP("vavguh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Avg16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x482: // vavguw (Average Unsigned Word, AV p154) DIP("vavguw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Avg32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; case 0x502: // vavgsb (Average Signed Byte, AV p149) DIP("vavgsb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Avg8Sx16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x542: // vavgsh (Average Signed Half Word, AV p150) DIP("vavgsh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Avg16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x582: // vavgsw (Average Signed Word, AV p151) DIP("vavgsw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_Avg32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; /* Multiply */ case 0x008: // vmuloub (Multiply Odd Unsigned Byte, AV p213) DIP("vmuloub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulLo16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x048: // vmulouh (Multiply Odd Unsigned Half Word, AV p214) DIP("vmulouh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulLo32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; case 0x108: // vmulosb (Multiply Odd Signed Byte, AV p211) DIP("vmulosb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulLo16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x148: // vmulosh (Multiply Odd Signed Half Word, AV p212) DIP("vmulosh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulLo32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; case 0x208: // vmuleub (Multiply Even Unsigned Byte, AV p209) DIP("vmuleub v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulHi16Ux8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x248: // vmuleuh (Multiply Even Unsigned Half Word, AV p210) DIP("vmuleuh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulHi32Ux4, mkexpr(vA), mkexpr(vB)) ); + break; case 0x308: // vmulesb (Multiply Even Signed Byte, AV p207) DIP("vmulesb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulHi16Sx8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x348: // vmulesh (Multiply Even Signed Half Word, AV p208) DIP("vmulesh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, binop(Iop_MulHi32Sx4, mkexpr(vA), mkexpr(vB)) ); + break; /* Sum Across Partial */ diff --git a/VEX/priv/host-ppc32/hdefs.c b/VEX/priv/host-ppc32/hdefs.c index 7b0a685169..abdad31d41 100644 --- a/VEX/priv/host-ppc32/hdefs.c +++ b/VEX/priv/host-ppc32/hdefs.c @@ -2902,11 +2902,6 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) case Pav_SUBUS: opc2 = 1536; break; // vsububs case Pav_SUBSS: opc2 = 1792; break; // vsubsbs - case Pav_OMULU: opc2 = 8; break; // vmuloub - case Pav_OMULS: opc2 = 264; break; // vmulosb - case Pav_EMULU: opc2 = 520; break; // vmuleub - case Pav_EMULS: opc2 = 776; break; // vmulesb - case Pav_AVGU: opc2 = 1026; break; // vavgub case Pav_AVGS: opc2 = 1282; break; // vavgsb case Pav_MAXU: opc2 = 2; break; // vmaxub @@ -2948,10 +2943,10 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) case Pav_SUBUS: opc2 = 1600; break; // vsubuhs case Pav_SUBSS: opc2 = 1856; break; // vsubshs - case Pav_OMULU: opc2 = 72; break; // vmulouh - case Pav_OMULS: opc2 = 328; break; // vmulosh - case Pav_EMULU: opc2 = 584; break; // vmuleuh - case Pav_EMULS: opc2 = 840; break; // vmulesh + case Pav_OMULU: opc2 = 8; break; // vmuloub + case Pav_OMULS: opc2 = 264; break; // vmulosb + case Pav_EMULU: opc2 = 520; break; // vmuleub + case Pav_EMULS: opc2 = 776; break; // vmulesb case Pav_AVGU: opc2 = 1090; break; // vavguh case Pav_AVGS: opc2 = 1346; break; // vavgsh @@ -3000,6 +2995,11 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) case Pav_SUBUS: opc2 = 1664; break; // vsubuws case Pav_SUBSS: opc2 = 1920; break; // vsubsws + case Pav_OMULU: opc2 = 72; break; // vmulouh + case Pav_OMULS: opc2 = 328; break; // vmulosh + case Pav_EMULU: opc2 = 584; break; // vmuleuh + case Pav_EMULS: opc2 = 840; break; // vmulesh + case Pav_AVGU: opc2 = 1154; break; // vavguw case Pav_AVGS: opc2 = 1410; break; // vavgsw @@ -3069,17 +3069,17 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) // Finally, do the multiply: p = mkFormVA( p, 4, v_dst, v_srcL, vB, v_srcR, 46 ); - break; + break; } case Pav_CMPEQF: p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 0, 198 ); // vcmpeqfp - break; + break; case Pav_CMPGTF: p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 710 ); // vcmpgtfp - break; + break; case Pav_CMPGEF: p = mkFormVXR( p, 4, v_dst, v_srcL, v_srcR, 1, 454 ); // vcmpgefp - break; + break; default: goto bad; diff --git a/VEX/priv/host-ppc32/isel.c b/VEX/priv/host-ppc32/isel.c index fc4c11b80b..894efdccc9 100644 --- a/VEX/priv/host-ppc32/isel.c +++ b/VEX/priv/host-ppc32/isel.c @@ -3217,7 +3217,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. addInstr(env, X86Instr_Sse32Fx4(op, argR, dst)); //.. return dst; //.. } -//.. + //.. case Iop_CmpEQ64Fx2: op = Xsse_CMPEQF; goto do_64Fx2; //.. case Iop_CmpLT64Fx2: op = Xsse_CMPLTF; goto do_64Fx2; //.. case Iop_CmpLE64Fx2: op = Xsse_CMPLEF; goto do_64Fx2; @@ -3237,7 +3237,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. addInstr(env, X86Instr_Sse64Fx2(op, argR, dst)); //.. return dst; //.. } -//.. + //.. case Iop_CmpEQ32F0x4: op = Xsse_CMPEQF; goto do_32F0x4; //.. case Iop_CmpLT32F0x4: op = Xsse_CMPLTF; goto do_32F0x4; //.. case Iop_CmpLE32F0x4: op = Xsse_CMPLEF; goto do_32F0x4; @@ -3255,7 +3255,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. addInstr(env, X86Instr_Sse32FLo(op, argR, dst)); //.. return dst; //.. } -//.. + //.. case Iop_CmpEQ64F0x2: op = Xsse_CMPEQF; goto do_64F0x2; //.. case Iop_CmpLT64F0x2: op = Xsse_CMPLTF; goto do_64F0x2; //.. case Iop_CmpLE64F0x2: op = Xsse_CMPLEF; goto do_64F0x2; @@ -3274,14 +3274,14 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. addInstr(env, X86Instr_Sse64FLo(op, argR, dst)); //.. return dst; //.. } -//.. + //.. case Iop_QNarrow32Sx4: //.. op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg; //.. case Iop_QNarrow16Sx8: //.. op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg; //.. case Iop_QNarrow16Ux8: //.. op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg; -//.. + //.. case Iop_InterleaveHI8x16: //.. op = Xsse_UNPCKHB; arg1isEReg = True; goto do_SseReRg; //.. case Iop_InterleaveHI16x8: @@ -3290,7 +3290,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. op = Xsse_UNPCKHD; arg1isEReg = True; goto do_SseReRg; //.. case Iop_InterleaveHI64x2: //.. op = Xsse_UNPCKHQ; arg1isEReg = True; goto do_SseReRg; -//.. + //.. case Iop_InterleaveLO8x16: //.. op = Xsse_UNPCKLB; arg1isEReg = True; goto do_SseReRg; //.. case Iop_InterleaveLO16x8: @@ -3303,9 +3303,15 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_AndV128: op = Pav_AND; goto do_AvBin; case Iop_OrV128: op = Pav_OR; goto do_AvBin; case Iop_XorV128: op = Pav_XOR; goto do_AvBin; + do_AvBin: { + HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); + HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegV(env); + addInstr(env, PPC32Instr_AvBinary(op, dst, arg1, arg2)); + return dst; + } + //.. case Iop_Add8x16: op = Xsse_ADD8; goto do_SseReRg; -//.. case Iop_Add16x8: op = Xsse_ADD16; goto do_SseReRg; -//.. case Iop_Add32x4: op = Xsse_ADD32; goto do_SseReRg; //.. case Iop_Add64x2: op = Xsse_ADD64; goto do_SseReRg; //.. case Iop_QAdd8Sx16: op = Xsse_QADD8S; goto do_SseReRg; //.. case Iop_QAdd16Sx8: op = Xsse_QADD16S; goto do_SseReRg; @@ -3319,31 +3325,74 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. case Iop_CmpGT8Sx16: op = Xsse_CMPGT8S; goto do_SseReRg; //.. case Iop_CmpGT16Sx8: op = Xsse_CMPGT16S; goto do_SseReRg; //.. case Iop_CmpGT32Sx4: op = Xsse_CMPGT32S; goto do_SseReRg; -//.. case Iop_Max16Sx8: op = Xsse_MAX16S; goto do_SseReRg; -//.. case Iop_Max8Ux16: op = Xsse_MAX8U; goto do_SseReRg; -//.. case Iop_Min16Sx8: op = Xsse_MIN16S; goto do_SseReRg; -//.. case Iop_Min8Ux16: op = Xsse_MIN8U; goto do_SseReRg; -//.. case Iop_MulHi16Ux8: op = Xsse_MULHI16U; goto do_SseReRg; -//.. case Iop_MulHi16Sx8: op = Xsse_MULHI16S; goto do_SseReRg; //.. case Iop_Mul16x8: op = Xsse_MUL16; goto do_SseReRg; -//.. case Iop_Sub8x16: op = Xsse_SUB8; goto do_SseReRg; -//.. case Iop_Sub16x8: op = Xsse_SUB16; goto do_SseReRg; -//.. case Iop_Sub32x4: op = Xsse_SUB32; goto do_SseReRg; //.. case Iop_Sub64x2: op = Xsse_SUB64; goto do_SseReRg; //.. case Iop_QSub8Sx16: op = Xsse_QSUB8S; goto do_SseReRg; //.. case Iop_QSub16Sx8: op = Xsse_QSUB16S; goto do_SseReRg; //.. case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg; //.. case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg; - do_AvBin: { + + case Iop_Add8x16: op = Pav_ADDUM; goto do_AvBin8x16; + case Iop_QAdd8Ux16: op = Pav_ADDUS; goto do_AvBin8x16; + case Iop_QAdd8Sx16: op = Pav_ADDSS; goto do_AvBin8x16; + case Iop_Sub8x16: op = Pav_SUBUM; goto do_AvBin8x16; + case Iop_QSub8Ux16: op = Pav_SUBUS; goto do_AvBin8x16; + case Iop_QSub8Sx16: op = Pav_SUBSS; goto do_AvBin8x16; + case Iop_Avg8Ux16: op = Pav_AVGU; goto do_AvBin8x16; + case Iop_Avg8Sx16: op = Pav_AVGS; goto do_AvBin8x16; + case Iop_Max8Ux16: op = Pav_MAXU; goto do_AvBin8x16; + case Iop_Max8Sx16: op = Pav_MAXS; goto do_AvBin8x16; + case Iop_Min8Ux16: op = Pav_MINU; goto do_AvBin8x16; + case Iop_Min8Sx16: op = Pav_MINS; goto do_AvBin8x16; + do_AvBin8x16: { HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); HReg dst = newVRegV(env); - addInstr(env, PPC32Instr_AvBinary(op, dst, arg1, arg2)); + addInstr(env, PPC32Instr_AvBin8x16(op, dst, arg1, arg2)); + return dst; + } + + case Iop_Add16x8: op = Pav_ADDUM; goto do_AvBin16x8; + case Iop_QAdd16Ux8: op = Pav_ADDUS; goto do_AvBin16x8; + case Iop_QAdd16Sx8: op = Pav_ADDSS; goto do_AvBin16x8; + case Iop_Sub16x8: op = Pav_SUBUM; goto do_AvBin16x8; + case Iop_QSub16Ux8: op = Pav_SUBUS; goto do_AvBin16x8; + case Iop_QSub16Sx8: op = Pav_SUBSS; goto do_AvBin16x8; + case Iop_Avg16Ux8: op = Pav_AVGU; goto do_AvBin16x8; + case Iop_Avg16Sx8: op = Pav_AVGS; goto do_AvBin16x8; + case Iop_Max16Ux8: op = Pav_MAXU; goto do_AvBin16x8; + case Iop_Max16Sx8: op = Pav_MAXS; goto do_AvBin16x8; + case Iop_Min16Ux8: op = Pav_MINU; goto do_AvBin16x8; + case Iop_Min16Sx8: op = Pav_MINS; goto do_AvBin16x8; + case Iop_MulLo16Ux8: op = Pav_OMULU; goto do_AvBin16x8; + case Iop_MulLo16Sx8: op = Pav_OMULS; goto do_AvBin16x8; + case Iop_MulHi16Ux8: op = Pav_EMULU; goto do_AvBin16x8; + case Iop_MulHi16Sx8: op = Pav_EMULS; goto do_AvBin16x8; + do_AvBin16x8: { + HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); + HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegV(env); + addInstr(env, PPC32Instr_AvBin16x8(op, dst, arg1, arg2)); return dst; } - case Iop_Add32x4: op = Pav_ADDUM; goto do_AvBin32x4; + case Iop_Add32x4: op = Pav_ADDUM; goto do_AvBin32x4; + case Iop_QAdd32Ux4: op = Pav_ADDUS; goto do_AvBin32x4; + case Iop_QAdd32Sx4: op = Pav_ADDSS; goto do_AvBin32x4; + case Iop_Sub32x4: op = Pav_SUBUM; goto do_AvBin32x4; + case Iop_QSub32Ux4: op = Pav_SUBUS; goto do_AvBin32x4; + case Iop_QSub32Sx4: op = Pav_SUBSS; goto do_AvBin32x4; + case Iop_Avg32Ux4: op = Pav_AVGU; goto do_AvBin32x4; + case Iop_Avg32Sx4: op = Pav_AVGS; goto do_AvBin32x4; + case Iop_Max32Ux4: op = Pav_MAXU; goto do_AvBin32x4; + case Iop_Max32Sx4: op = Pav_MAXS; goto do_AvBin32x4; + case Iop_Min32Ux4: op = Pav_MINU; goto do_AvBin32x4; + case Iop_Min32Sx4: op = Pav_MINS; goto do_AvBin32x4; case Iop_CmpGT32Ux4: op = Pav_CMPGTU; goto do_AvBin32x4; + case Iop_MulLo32Ux4: op = Pav_OMULU; goto do_AvBin32x4; + case Iop_MulLo32Sx4: op = Pav_OMULS; goto do_AvBin32x4; + case Iop_MulHi32Ux4: op = Pav_EMULU; goto do_AvBin32x4; + case Iop_MulHi32Sx4: op = Pav_EMULS; goto do_AvBin32x4; do_AvBin32x4: { HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); @@ -3394,24 +3443,6 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } -//.. do_SseShift: { -//.. HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); -//.. X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); -//.. X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP()); -//.. HReg ereg = newVRegV(env); -//.. HReg dst = newVRegV(env); -//.. REQUIRE_SSE2; -//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0))); -//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0))); -//.. addInstr(env, X86Instr_Push(X86RMI_Imm(0))); -//.. addInstr(env, X86Instr_Push(rmi)); -//.. addInstr(env, X86Instr_SseLdSt(True/*load*/, ereg, esp0)); -//.. addInstr(env, mk_vMOVsd_RR(greg, dst)); -//.. addInstr(env, X86Instr_SseReRg(op, ereg, dst)); -//.. add_to_esp(env, 16); -//.. return dst; -//.. } - default: break; } /* switch (e->Iex.Binop.op) */