From: Cerion Armour-Brown Date: Thu, 15 Sep 2005 21:58:50 +0000 (+0000) Subject: Added AltiVec permutation insns: X-Git-Tag: svn/VALGRIND_3_1_1^2~92 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=4e901cbc6311fd3634225d8cf835f1582d765854;p=thirdparty%2Fvalgrind.git Added AltiVec permutation insns: - vperm, vsldoi, vmrg*, vsplt* git-svn-id: svn://svn.valgrind.org/vex/trunk@1399 --- diff --git a/VEX/priv/guest-ppc32/toIR.c b/VEX/priv/guest-ppc32/toIR.c index 14656e12db..396f57da0d 100644 --- a/VEX/priv/guest-ppc32/toIR.c +++ b/VEX/priv/guest-ppc32/toIR.c @@ -335,12 +335,10 @@ static UChar extend_s_5to8 ( UChar x ) return toUChar((((Int)x) << 27) >> 27); } -//zz #if 0 -//zz static UInt extend_s_8to32( UInt x ) -//zz { -//zz return (UInt)((((Int)x) << 24) >> 24); -//zz } -//zz #endif +static UInt extend_s_8to32( UChar x ) +{ + return (UInt)((((Int)x) << 24) >> 24); +} static UInt extend_s_16to32 ( UInt x ) { @@ -406,6 +404,11 @@ static IRExpr* mkU8 ( UChar i ) return IRExpr_Const(IRConst_U8(i)); } +static IRExpr* mkU16 ( UInt i ) +{ + return IRExpr_Const(IRConst_U16(i)); +} + static IRExpr* mkU32 ( UInt i ) { return IRExpr_Const(IRConst_U32(i)); @@ -5774,19 +5777,40 @@ static Bool dis_av_permute ( UInt theInstr ) binop(Iop_AndV128, mkexpr(vB), mkexpr(vC))) ); return True; - case 0x2B: // vperm (Permute, AV p218) - DIP("vperm v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); - DIP(" => not implemented\n"); - return False; - + case 0x2B: { // vperm (Permute, AV p218) + DIP("vperma v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); + /* limited to two args for IR, so have to play games... */ + IRTemp a_perm = newTemp(Ity_V128); + IRTemp b_perm = newTemp(Ity_V128); + IRTemp mask = newTemp(Ity_V128); + assign( a_perm, binop(Iop_Perm, mkexpr(vA), mkexpr(vC)) ); + assign( b_perm, binop(Iop_Perm, mkexpr(vB), mkexpr(vC)) ); + // mask[i8] = (vC[i8]_4 == 1) ? 0xFF : 0x0 + assign( mask, binop(Iop_SarN8x16, + binop(Iop_ShlN8x16, mkexpr(vC), mkU8(3)), + mkU8(7)) ); + // dst = (a & ~mask) | (b & mask) + putVReg( vD_addr, binop(Iop_OrV128, + binop(Iop_AndV128, mkexpr(a_perm), + unop(Iop_NotV128, mkexpr(mask))), + binop(Iop_AndV128, mkexpr(b_perm), + mkexpr(mask))) ); + return True; + } case 0x2C: // vsldoi (Shift Left Double by Octet Imm, AV p241) if (b10 != 0) { vex_printf("dis_av_permute(PPC32)(vsldoi)\n"); return False; } DIP("vsldoi v%d,v%d,v%d,%d\n", vD_addr, vA_addr, vB_addr, SHB_uimm4); - DIP(" => not implemented\n"); - return False; + if (SHB_uimm4 == 0) + putVReg( vD_addr, mkexpr(vA) ); + else + putVReg( vD_addr, + binop(Iop_OrV128, + binop(Iop_ShlV128, mkexpr(vA), mkU8(SHB_uimm4*8)), + binop(Iop_ShrV128, mkexpr(vB), mkU8((16-SHB_uimm4)*8))) ); + return True; default: break; // Fall through... @@ -5798,49 +5822,63 @@ static Bool dis_av_permute ( UInt theInstr ) /* Merge */ case 0x00C: // vmrghb (Merge High B, AV p195) DIP("vmrghb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, + binop(Iop_InterleaveHI8x16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x04C: // vmrghh (Merge High HW, AV p196) DIP("vmrghh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, + binop(Iop_InterleaveHI16x8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x08C: // vmrghw (Merge High W, AV p197) DIP("vmrghw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, + binop(Iop_InterleaveHI32x4, mkexpr(vA), mkexpr(vB)) ); + break; case 0x10C: // vmrglb (Merge Low B, AV p198) DIP("vmrglb v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, + binop(Iop_InterleaveLO8x16, mkexpr(vA), mkexpr(vB)) ); + break; case 0x14C: // vmrglh (Merge Low HW, AV p199) DIP("vmrglh v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, + binop(Iop_InterleaveLO16x8, mkexpr(vA), mkexpr(vB)) ); + break; case 0x18C: // vmrglw (Merge Low W, AV p200) DIP("vmrglw v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, + binop(Iop_InterleaveLO32x4, mkexpr(vA), mkexpr(vB)) ); + break; + /* Splat */ - case 0x20C: // vspltb (Splat Byte, AV p245) + case 0x20C: { // vspltb (Splat Byte, AV p245) DIP("vspltb v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); - DIP(" => not implemented\n"); - return False; - - case 0x24C: // vsplth (Splat Half Word, AV p246) + /* vD = Dup8x16( vB[UIMM_5] ) */ + UChar sh_uimm = (15-UIMM_5)*8; + putVReg( vD_addr, unop(Iop_Dup8x16, + unop(Iop_32to8, unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) ); + break; + } + case 0x24C: { // vsplth (Splat Half Word, AV p246) DIP("vsplth v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); - DIP(" => not implemented\n"); - return False; - + UChar sh_uimm = (7-UIMM_5)*16; + putVReg( vD_addr, unop(Iop_Dup16x8, + unop(Iop_32to16, unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm))))) ); + break; + } case 0x28C: { // vspltw (Splat Word, AV p250) DIP("vspltw v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); /* vD = Dup32x4( vB[UIMM_5] ) */ - unsigned int sh_uimm = (3-UIMM_5)*32; + UChar sh_uimm = (3-UIMM_5)*32; putVReg( vD_addr, unop(Iop_Dup32x4, unop(Iop_V128to32, binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm)))) ); @@ -5848,18 +5886,18 @@ static Bool dis_av_permute ( UInt theInstr ) } case 0x30C: // vspltisb (Splat Immediate Signed B, AV p247) DIP("vspltisb v%d,%d\n", vD_addr, (Char)SIMM_8); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, unop(Iop_Dup8x16, mkU8(SIMM_8)) ); + break; case 0x34C: // vspltish (Splat Immediate Signed HW, AV p248) DIP("vspltish v%d,%d\n", vD_addr, (Char)SIMM_8); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, unop(Iop_Dup16x8, mkU16(extend_s_8to32(SIMM_8))) ); + break; case 0x38C: // vspltisw (Splat Immediate Signed W, AV p249) DIP("vspltisw v%d,%d\n", vD_addr, (Char)SIMM_8); - DIP(" => not implemented\n"); - return False; + putVReg( vD_addr, unop(Iop_Dup32x4, mkU32(extend_s_8to32(SIMM_8))) ); + break; default: vex_printf("dis_av_permute(PPC32)(opc2)\n"); @@ -6614,12 +6652,8 @@ DisResult disInstr_PPC32_WRK ( /* AV Permutations */ case 0x2A: // vsel case 0x2B: // vperm - if (dis_av_permute( theInstr )) goto decode_success; - goto decode_failure; - - /* AV Shift */ case 0x2C: // vsldoi - if (dis_av_shift( theInstr )) goto decode_success; + if (dis_av_permute( theInstr )) goto decode_success; goto decode_failure; /* AV Floating Point Mult-Add/Sub */ diff --git a/VEX/priv/host-ppc32/hdefs.c b/VEX/priv/host-ppc32/hdefs.c index abdad31d41..efbed240fd 100644 --- a/VEX/priv/host-ppc32/hdefs.c +++ b/VEX/priv/host-ppc32/hdefs.c @@ -973,13 +973,13 @@ PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR i->Pin.AvBin32Fx4.srcR = srcR; return i; } -PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL, HReg srcR ) { +PPC32Instr* PPC32Instr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl ) { PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); i->tag = Pin_AvPerm; - i->Pin.AvPerm.ctl = ctl; i->Pin.AvPerm.dst = dst; i->Pin.AvPerm.srcL = srcL; i->Pin.AvPerm.srcR = srcR; + i->Pin.AvPerm.ctl = ctl; return i; } PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ) { @@ -1636,9 +1636,9 @@ void getRegUsage_PPC32Instr ( HRegUsage* u, PPC32Instr* i ) return; case Pin_AvPerm: addHRegUse(u, HRmWrite, i->Pin.AvPerm.dst); - addHRegUse(u, HRmRead, i->Pin.AvPerm.ctl); addHRegUse(u, HRmRead, i->Pin.AvPerm.srcL); addHRegUse(u, HRmRead, i->Pin.AvPerm.srcR); + addHRegUse(u, HRmRead, i->Pin.AvPerm.ctl); return; case Pin_AvSel: addHRegUse(u, HRmWrite, i->Pin.AvSel.dst); @@ -3088,10 +3088,10 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) } case Pin_AvPerm: { // vperm - UInt v_ctl = vregNo(i->Pin.AvPerm.ctl); UInt v_dst = vregNo(i->Pin.AvPerm.dst); UInt v_srcL = vregNo(i->Pin.AvPerm.srcL); UInt v_srcR = vregNo(i->Pin.AvPerm.srcR); + UInt v_ctl = vregNo(i->Pin.AvPerm.ctl); p = mkFormVA( p, 4, v_dst, v_srcL, v_srcR, v_ctl, 43 ); goto done; } diff --git a/VEX/priv/host-ppc32/hdefs.h b/VEX/priv/host-ppc32/hdefs.h index 01e56f1bf8..ff06b8ecf2 100644 --- a/VEX/priv/host-ppc32/hdefs.h +++ b/VEX/priv/host-ppc32/hdefs.h @@ -670,16 +670,16 @@ typedef } AvBin32Fx4; /* Perm,Sel,SlDbl,Splat are all weird AV permutations */ struct { - HReg ctl; HReg dst; HReg srcL; HReg srcR; + HReg ctl; } AvPerm; struct { - HReg ctl; HReg dst; HReg srcL; HReg srcR; + HReg ctl; } AvSel; struct { UChar shift; @@ -742,7 +742,7 @@ extern PPC32Instr* PPC32Instr_AvBin8x16 ( PPC32AvOp op, HReg dst, HReg srcL, HR extern PPC32Instr* PPC32Instr_AvBin16x8 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvBin32x4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg srcL, HReg srcR ); -extern PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL, HReg srcR ); +extern PPC32Instr* PPC32Instr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl ); extern PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s* src ); diff --git a/VEX/priv/host-ppc32/isel.c b/VEX/priv/host-ppc32/isel.c index 5d4767c8ff..d2eb390348 100644 --- a/VEX/priv/host-ppc32/isel.c +++ b/VEX/priv/host-ppc32/isel.c @@ -3127,10 +3127,10 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. return dst; //.. } - case Iop_Dup32x4: { - HReg dst = mk_AvDuplicateRI(env, e->Iex.Binop.arg1); - return dst; - } + case Iop_Dup8x16: + case Iop_Dup16x8: + case Iop_Dup32x4: + return mk_AvDuplicateRI(env, e->Iex.Binop.arg1); default: break; @@ -3332,6 +3332,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. case Iop_QSub8Ux16: op = Xsse_QSUB8U; goto do_SseReRg; //.. case Iop_QSub16Ux8: op = Xsse_QSUB16U; goto do_SseReRg; + case Iop_InterleaveHI8x16: op = Pav_MRGHI; goto do_AvBin8x16; + case Iop_InterleaveLO8x16: op = Pav_MRGLO; goto do_AvBin8x16; case Iop_Add8x16: op = Pav_ADDUM; goto do_AvBin8x16; case Iop_QAdd8Ux16: op = Pav_ADDUS; goto do_AvBin8x16; case Iop_QAdd8Sx16: op = Pav_ADDSS; goto do_AvBin8x16; @@ -3355,6 +3357,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + case Iop_InterleaveHI16x8: op = Pav_MRGHI; goto do_AvBin16x8; + case Iop_InterleaveLO16x8: op = Pav_MRGLO; goto do_AvBin16x8; case Iop_Add16x8: op = Pav_ADDUM; goto do_AvBin16x8; case Iop_QAdd16Ux8: op = Pav_ADDUS; goto do_AvBin16x8; case Iop_QAdd16Sx8: op = Pav_ADDSS; goto do_AvBin16x8; @@ -3382,6 +3386,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + case Iop_InterleaveHI32x4: op = Pav_MRGHI; goto do_AvBin32x4; + case Iop_InterleaveLO32x4: op = Pav_MRGLO; goto do_AvBin32x4; case Iop_Add32x4: op = Pav_ADDUM; goto do_AvBin32x4; case Iop_QAdd32Ux4: op = Pav_ADDUS; goto do_AvBin32x4; case Iop_QAdd32Sx4: op = Pav_ADDSS; goto do_AvBin32x4; @@ -3433,9 +3439,19 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift; //.. case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift; + case Iop_ShlN8x16: op = Pav_SHL; goto do_AvShift8x16; + case Iop_SarN8x16: op = Pav_SAR; goto do_AvShift8x16; + do_AvShift8x16: { + HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1); + HReg dst = newVRegV(env); + HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2); + addInstr(env, PPC32Instr_AvBin8x16(op, dst, r_src, v_shft)); + return dst; + } + case Iop_ShrN32x4: op = Pav_SHR; goto do_AvShift32x4; do_AvShift32x4: { - HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1); + HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1); HReg dst = newVRegV(env); HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2); addInstr(env, PPC32Instr_AvBin32x4(op, dst, r_src, v_shft)); @@ -3443,14 +3459,24 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) } case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128; + case Iop_ShlV128: op = Pav_SHL; goto do_AvShiftV128; do_AvShiftV128: { HReg dst = newVRegV(env); HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1); HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2); + /* Note: shift value gets masked by 127 */ addInstr(env, PPC32Instr_AvBinary(op, dst, r_src, v_shft)); return dst; } + case Iop_Perm: { + HReg dst = newVRegV(env); + HReg v_src = iselVecExpr(env, e->Iex.Binop.arg1); + HReg v_ctl = iselVecExpr(env, e->Iex.Binop.arg2); + addInstr(env, PPC32Instr_AvPerm(dst, v_src, v_src, v_ctl)); + return dst; + } + default: break; } /* switch (e->Iex.Binop.op) */