From 90516c4a6692c53c57927ed9e88bec46fb64daff Mon Sep 17 00:00:00 2001 From: Cerion Armour-Brown Date: Wed, 14 Sep 2005 20:35:47 +0000 Subject: [PATCH] more altivec insns: vsr, vspltw - only working with with --tool=none back-end: hdefs: new type for PPC32Instr_AvSplat: PPC32VI5s => {vector-reg | signed-5bit-imm} fixed ShlV128, ShrV128 to shift the full 128bits isel: implemented Iop_Dup32x4, Iop_ShrV128 new function mk_AvDuplicateRI() - takes in ri_src (imm|reg, latter of type 8|16|32) returns vector reg of duplicated lanes of ri_src avoids store/load for immediates up to simm6. git-svn-id: svn://svn.valgrind.org/vex/trunk@1392 --- VEX/priv/guest-ppc32/toIR.c | 30 ++++++--- VEX/priv/host-ppc32/hdefs.c | 117 +++++++++++++++++++++++++++--------- VEX/priv/host-ppc32/hdefs.h | 30 ++++++++- VEX/priv/host-ppc32/isel.c | 102 ++++++++++++++++++++++++++++++- 4 files changed, 239 insertions(+), 40 deletions(-) diff --git a/VEX/priv/guest-ppc32/toIR.c b/VEX/priv/guest-ppc32/toIR.c index 5caea6d11f..b8a7460761 100644 --- a/VEX/priv/guest-ppc32/toIR.c +++ b/VEX/priv/guest-ppc32/toIR.c @@ -5518,6 +5518,11 @@ static Bool dis_av_shift ( UInt theInstr ) UChar vB_addr = toUChar((theInstr >> 11) & 0x1F); /* theInstr[11:15] */ UInt opc2 = (theInstr >> 0) & 0x7FF; /* theInstr[0:10] */ + IRTemp vA = newTemp(Ity_V128); + IRTemp vB = newTemp(Ity_V128); + assign( vA, getVReg(vA_addr)); + assign( vB, getVReg(vB_addr)); + if (opc1 != 0x4){ vex_printf("dis_av_shift(PPC32)(instr)\n"); return False; @@ -5583,11 +5588,16 @@ static Bool dis_av_shift ( UInt theInstr ) DIP(" => not implemented\n"); return False; - case 0x2C4: // vsr (Shift Right, AV p252) + case 0x2C4: { // vsr (Shift Right, AV p251) DIP("vsr v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); - DIP(" => not implemented\n"); - return False; - + IRTemp sh = newTemp(Ity_I8); + assign( sh, binop(Iop_And8, mkU8(0x7), + unop(Iop_32to8, + unop(Iop_V128to32, mkexpr(vB)))) ); + putVReg( vD_addr, + binop(Iop_ShrV128, mkexpr(vA), mkexpr(sh)) ); + break; + } case 0x304: // vsrab (Shift Right Algebraic B, AV p253) DIP("vsrab v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr); DIP(" => not implemented\n"); @@ -5717,11 +5727,15 @@ static Bool dis_av_permute ( UInt theInstr ) DIP(" => not implemented\n"); return False; - case 0x28C: // vspltw (Splat Word, AV p250) + case 0x28C: { // vspltw (Splat Word, AV p250) DIP("vspltw v%d,v%d,%d\n", vD_addr, vB_addr, UIMM_5); - DIP(" => not implemented\n"); - return False; - + /* vD = Dup32x4( vB[UIMM_5] ) */ + unsigned int sh_uimm = (3-UIMM_5)*32; + putVReg( vD_addr, unop(Iop_Dup32x4, + unop(Iop_V128to32, + binop(Iop_ShrV128, mkexpr(vB), mkU8(sh_uimm)))) ); + break; + } case 0x30C: // vspltisb (Splat Immediate Signed B, AV p247) DIP("vspltisb v%d,%d\n", vD_addr, (Char)SIMM_8); DIP(" => not implemented\n"); diff --git a/VEX/priv/host-ppc32/hdefs.c b/VEX/priv/host-ppc32/hdefs.c index 0cdcb89c23..59d1cf6d7d 100644 --- a/VEX/priv/host-ppc32/hdefs.c +++ b/VEX/priv/host-ppc32/hdefs.c @@ -529,6 +529,64 @@ static void mapRegs_PPC32RI ( HRegRemap* m, PPC32RI* dst ) { } +/* --------- Operand, which can be a vector reg or a simm5. --------- */ + +PPC32VI5s* PPC32VI5s_Imm ( Char simm5 ) { + PPC32VI5s* op = LibVEX_Alloc(sizeof(PPC32VI5s)); + op->tag = Pvi_Imm; + op->Pvi.Imm5s = simm5; + vassert(simm5 >= -16 && simm5 <= 15); + return op; +} +PPC32VI5s* PPC32VI5s_Reg ( HReg reg ) { + PPC32VI5s* op = LibVEX_Alloc(sizeof(PPC32VI5s)); + op->tag = Pvi_Reg; + op->Pvi.Reg = reg; + vassert(hregClass(reg) == HRcVec128); + return op; +} + +void ppPPC32VI5s ( PPC32VI5s* src ) { + switch (src->tag) { + case Pvi_Imm: + vex_printf("%d", (Int)src->Pvi.Imm5s); + break; + case Pvi_Reg: + ppHRegPPC32(src->Pvi.Reg); + break; + default: + vpanic("ppPPC32VI5s"); + } +} + +/* An PPC32VI5s can only be used in a "read" context (what would it + mean to write or modify a literal?) and so we enumerate its + registers accordingly. */ +static void addRegUsage_PPC32VI5s ( HRegUsage* u, PPC32VI5s* dst ) { + switch (dst->tag) { + case Pvi_Imm: + return; + case Pvi_Reg: + addHRegUse(u, HRmRead, dst->Pvi.Reg); + return; + default: + vpanic("addRegUsage_PPC32VI5s"); + } +} + +static void mapRegs_PPC32VI5s ( HRegRemap* m, PPC32VI5s* dst ) { + switch (dst->tag) { + case Pvi_Imm: + return; + case Pvi_Reg: + dst->Pvi.Reg = lookupHRegRemap(m, dst->Pvi.Reg); + return; + default: + vpanic("mapRegs_PPC32VI5s"); + } +} + + /* --------- Instructions. --------- */ HChar* showPPC32UnaryOp ( PPC32UnaryOp op ) { @@ -942,7 +1000,7 @@ PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR ) i->Pin.AvShlDbl.srcR = srcR; return i; } -PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* src ) { +PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s* src ) { PPC32Instr* i = LibVEX_Alloc(sizeof(PPC32Instr)); i->tag = Pin_AvSplat; i->Pin.AvSplat.sz = sz; @@ -1355,20 +1413,15 @@ void ppPPC32Instr ( PPC32Instr* i ) return; case Pin_AvSplat: { - UChar ch_sz = toUChar( - (i->Pin.AvSplat.sz == 8) ? 'b' : - (i->Pin.AvSplat.sz == 16) ? 'h' : 'w' - ); + UChar sz = i->Pin.AvSplat.sz; + UChar ch_sz = toUChar( (sz == 8) ? 'b' : (sz == 16) ? 'h' : 'w' ); vex_printf("vsplt%s%c ", - i->Pin.AvSplat.src->tag == Pri_Imm ? "is" : "", ch_sz); + i->Pin.AvSplat.src->tag == Pvi_Imm ? "is" : "", ch_sz); ppHRegPPC32(i->Pin.AvSplat.dst); vex_printf(","); - if (i->Pin.AvSplat.src->tag == Pri_Imm) { - vex_printf("%d", (Char)(i->Pin.AvSplat.src->Pri.Imm)); - } else { - ppHRegPPC32(i->Pin.AvSplat.src->Pri.Reg); - vex_printf(", 0"); - } + ppPPC32VI5s(i->Pin.AvSplat.src); + if (i->Pin.AvSplat.src->tag == Pvi_Reg) + vex_printf(", %u", (128/sz)-1); /* louis lane */ return; } @@ -1599,8 +1652,8 @@ void getRegUsage_PPC32Instr ( HRegUsage* u, PPC32Instr* i ) addHRegUse(u, HRmRead, i->Pin.AvShlDbl.srcR); return; case Pin_AvSplat: - addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst); - addRegUsage_PPC32RI(u, i->Pin.AvSplat.src); + addHRegUse(u, HRmWrite, i->Pin.AvSplat.dst); + addRegUsage_PPC32VI5s(u, i->Pin.AvSplat.src); return; case Pin_AvCMov: addHRegUse(u, HRmModify, i->Pin.AvCMov.dst); @@ -1764,7 +1817,7 @@ void mapRegs_PPC32Instr (HRegRemap* m, PPC32Instr* i) return; case Pin_AvSplat: mapReg(m, &i->Pin.AvSplat.dst); - mapRegs_PPC32RI(m, i->Pin.AvSplat.src); + mapRegs_PPC32VI5s(m, i->Pin.AvSplat.src); return; case Pin_AvCMov: mapReg(m, &i->Pin.AvCMov.dst); @@ -2812,16 +2865,21 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) UInt v_srcL = vregNo(i->Pin.AvBinary.srcL); UInt v_srcR = vregNo(i->Pin.AvBinary.srcR); UInt opc2; + if (i->Pin.AvBinary.op == Pav_SHL) { + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1036 ); // vslo + p = mkFormVX( p, 4, v_dst, v_dst, v_srcR, 452 ); // vsl + goto done; + } + if (i->Pin.AvBinary.op == Pav_SHR) { + p = mkFormVX( p, 4, v_dst, v_srcL, v_srcR, 1100 ); // vsro + p = mkFormVX( p, 4, v_dst, v_dst, v_srcR, 708 ); // vsr + goto done; + } switch (i->Pin.AvBinary.op) { /* Bitwise */ case Pav_AND: opc2 = 1028; break; // vand case Pav_OR: opc2 = 1156; break; // vor case Pav_XOR: opc2 = 1220; break; // vxor - - /* Shift */ - case Pav_SHL: opc2 = 452; break; // vsl - case Pav_SHR: opc2 = 708; break; // vsr - default: goto bad; } @@ -3060,17 +3118,22 @@ Int emit_PPC32Instr ( UChar* buf, Int nbuf, PPC32Instr* i ) case Pin_AvSplat: { // vsplt(is)(b,h,w) UInt v_dst = vregNo(i->Pin.AvShlDbl.dst); UChar sz = i->Pin.AvSplat.sz; - UInt v_src, simm_src, opc2; + UInt v_src, opc2; vassert(sz == 8 || sz == 16 || sz == 32); - if (i->Pin.AvSplat.src->tag == Pri_Imm) { + if (i->Pin.AvSplat.src->tag == Pvi_Imm) { opc2 = (sz == 8) ? 780 : (sz == 16) ? 844 : 908; // 8,16,32 - simm_src = i->Pin.AvSplat.src->Pri.Imm; - p = mkFormVX( p, 4, v_dst, simm_src, 0, opc2 ); - } else { // Pri_Reg + /* expects 5-bit-signed-imm */ + Char simm5 = i->Pin.AvSplat.src->Pvi.Imm5s; + vassert(simm5 >= -16 && simm5 <= 15); + p = mkFormVX( p, 4, v_dst, (UInt)simm5, 0, opc2 ); + } + else { // Pri_Reg opc2 = (sz == 8) ? 524 : (sz == 16) ? 588 : 652; // 8,16,32 - v_src = iregNo(i->Pin.AvSplat.src->Pri.Reg); - p = mkFormVX( p, 4, v_dst, 0, v_src, opc2 ); + vassert(hregClass(i->Pin.AvSplat.src->Pvi.Reg) == HRcVec128); + v_src = vregNo(i->Pin.AvSplat.src->Pvi.Reg); + UInt lowest_lane = (128/sz)-1; + p = mkFormVX( p, 4, v_dst, lowest_lane, v_src, opc2 ); } goto done; } diff --git a/VEX/priv/host-ppc32/hdefs.h b/VEX/priv/host-ppc32/hdefs.h index e82b045d49..01e56f1bf8 100644 --- a/VEX/priv/host-ppc32/hdefs.h +++ b/VEX/priv/host-ppc32/hdefs.h @@ -293,6 +293,32 @@ extern PPC32RI* PPC32RI_Reg ( HReg ); extern void ppPPC32RI ( PPC32RI* ); +/* --------- Operand, which can be a vector reg or a s6. --------- */ +/* ("VI" == "Vector Register or Immediate") */ +typedef + enum { + Pvi_Imm=5, + Pvi_Reg=6 + } + PPC32VI5sTag; + +typedef + struct { + PPC32VI5sTag tag; + union { + Char Imm5s; + HReg Reg; + } + Pvi; + } + PPC32VI5s; + +extern PPC32VI5s* PPC32VI5s_Imm ( Char ); +extern PPC32VI5s* PPC32VI5s_Reg ( HReg ); + +extern void ppPPC32VI5s ( PPC32VI5s* ); + + /* --------- Instructions. --------- */ /* --------- */ @@ -664,7 +690,7 @@ typedef struct { UChar sz; /* 8,16,32 */ HReg dst; - PPC32RI* src; + PPC32VI5s* src; } AvSplat; /* Mov src to dst on the given condition, which may not be the bogus Xcc_ALWAYS. */ @@ -719,7 +745,7 @@ extern PPC32Instr* PPC32Instr_AvBin32Fx4 ( PPC32AvOp op, HReg dst, HReg srcL, HR extern PPC32Instr* PPC32Instr_AvPerm ( HReg ctl, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ); extern PPC32Instr* PPC32Instr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR ); -extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32RI* src ); +extern PPC32Instr* PPC32Instr_AvSplat ( UChar sz, HReg dst, PPC32VI5s* src ); extern PPC32Instr* PPC32Instr_AvCMov ( PPC32CondCode, HReg dst, HReg src ); extern PPC32Instr* PPC32Instr_AvLdVSCR ( HReg src ); diff --git a/VEX/priv/host-ppc32/isel.c b/VEX/priv/host-ppc32/isel.c index 8471fdb2f5..d65d17a94c 100644 --- a/VEX/priv/host-ppc32/isel.c +++ b/VEX/priv/host-ppc32/isel.c @@ -752,6 +752,85 @@ void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) //.. add_to_esp(env, 8); //.. } +/* + Generates code for AvSplat + - takes in IRExpr* of type 8|16|32 + returns vector reg of duplicated lanes of input + - uses AvSplat(imm) for imms up to simm6. + otherwise must use store reg & load vector +*/ +static HReg mk_AvDuplicateRI( ISelEnv* env, IRExpr* e ) +{ + HReg dst = newVRegV(env); + PPC32RI* ri = iselIntExpr_RI(env, e); + IRType ty = typeOfIRExpr(env->type_env,e); + UInt sz = (ty == Ity_I8) ? 8 : (ty == Ity_I16) ? 16 : 32; + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); + + HReg r_src; + /* special case: immediate */ + if (ri->tag == Pri_Imm) { + Int simm32 = (Int)ri->Pri.Imm; + + /* figure out if it's do-able with imm splats. */ + if (simm32 >= -32 && simm32 <= 31) { + Char simm6 = (Char)simm32; + if (simm6 > 15) { /* 16:31 inclusive */ + HReg v1 = newVRegV(env); + HReg v2 = newVRegV(env); + addInstr(env, PPC32Instr_AvSplat(sz, v1, PPC32VI5s_Imm(-16))); + addInstr(env, PPC32Instr_AvSplat(sz, v2, PPC32VI5s_Imm(simm6-16))); + addInstr(env, PPC32Instr_AvBinary(Pav_SUBUM, dst, v2, v1)); + return dst; + } + if (simm6 < -16) { /* -32:-17 inclusive */ + HReg v1 = newVRegV(env); + HReg v2 = newVRegV(env); + addInstr(env, PPC32Instr_AvSplat(sz, v1, PPC32VI5s_Imm(-16))); + addInstr(env, PPC32Instr_AvSplat(sz, v2, PPC32VI5s_Imm(simm6+16))); + addInstr(env, PPC32Instr_AvBinary(Pav_ADDUM, dst, v2, v1)); + return dst; + } + /* simplest form: -16:15 inclusive */ + addInstr(env, PPC32Instr_AvSplat(sz, dst, PPC32VI5s_Imm(simm6))); + return dst; + } + + /* no luck; use the Slow way. */ + r_src = newVRegI(env); + addInstr(env, PPC32Instr_LI32(r_src, (UInt)simm32)); + } + else { + r_src = ri->Pri.Reg; + } + + /* default case: store r_src in lowest lane of 16-aligned mem, + load vector, splat lowest lane to dst */ + { + /* CAB: Perhaps faster to store r_src multiple times (sz dependent), + and simply load the vector? */ + + HReg v_src = newVRegV(env); + PPC32AMode *am_off12; + + sub_from_sp( env, 32 ); // Move SP down + /* Get a 16-aligned address within our stack space */ + HReg r_aligned16 = get_sp_aligned16( env ); + am_off12 = PPC32AMode_IR( 12, r_aligned16); + + /* Store r_src in low word of 16-aligned mem */ + addInstr(env, PPC32Instr_Store( 4, am_off12, r_src )); + + /* Load src to vector[low lane] */ + addInstr(env, PPC32Instr_AvLdSt( True/*load*/, 4, v_src, am_off12 )); + add_to_sp( env, 32 ); // Reset SP + + /* Finally, splat v_src[low_lane] to dst */ + addInstr(env, PPC32Instr_AvSplat(sz, dst, PPC32VI5s_Reg(v_src))); + return dst; + } +} + /*---------------------------------------------------------*/ /*--- ISEL: Integer expressions (32/16/8 bit) ---*/ @@ -1498,13 +1577,15 @@ static PPC32RI* iselIntExpr_RI ( ISelEnv* env, IRExpr* e ) static PPC32RI* iselIntExpr_RI_wrk ( ISelEnv* env, IRExpr* e ) { IRType ty = typeOfIRExpr(env->type_env,e); - vassert(ty == Ity_I32); + vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32); /* special case: immediate */ if (e->tag == Iex_Const) { UInt u; switch (e->Iex.Const.con->tag) { - case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; + case Ico_U32: u = e->Iex.Const.con->Ico.U32; break; + case Ico_U16: u = 0xFFFF & e->Iex.Const.con->Ico.U16; break; + case Ico_U8: u = 0xFF & e->Iex.Const.con->Ico.U8; break; default: vpanic("iselIntExpr_RI.Iex_Const(ppc32h)"); } return PPC32RI_Imm(u); @@ -3040,6 +3121,11 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. return dst; //.. } + case Iop_Dup32x4: { + HReg dst = mk_AvDuplicateRI(env, e->Iex.Binop.arg1); + return dst; + } + default: break; } /* switch (e->Iex.Unop.op) */ @@ -3245,7 +3331,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) do_AvBin: { HReg arg1 = iselVecExpr(env, e->Iex.Binop.arg1); HReg arg2 = iselVecExpr(env, e->Iex.Binop.arg2); - HReg dst = newVRegV(env); + HReg dst = newVRegV(env); addInstr(env, PPC32Instr_AvBinary(op, dst, arg1, arg2)); return dst; } @@ -3273,6 +3359,16 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. case Iop_ShrN16x8: op = Xsse_SHR16; goto do_SseShift; //.. case Iop_ShrN32x4: op = Xsse_SHR32; goto do_SseShift; //.. case Iop_ShrN64x2: op = Xsse_SHR64; goto do_SseShift; + + case Iop_ShrV128: op = Pav_SHR; goto do_AvShiftV128; + do_AvShiftV128: { + HReg dst = newVRegV(env); + HReg r_src = iselVecExpr(env, e->Iex.Binop.arg1); + HReg v_shft = mk_AvDuplicateRI(env, e->Iex.Binop.arg2); + addInstr(env, PPC32Instr_AvBinary(op, dst, r_src, v_shft)); + return dst; + } + //.. do_SseShift: { //.. HReg greg = iselVecExpr(env, e->Iex.Binop.arg1); //.. X86RMI* rmi = iselIntExpr_RMI(env, e->Iex.Binop.arg2); -- 2.47.3