From: Julian Seward Date: Sun, 23 Nov 2014 12:14:41 +0000 (+0000) Subject: Merge, from trunk, 2962, 2966, 2967, 2973 X-Git-Tag: svn/VALGRIND_3_10_1^2~19 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=53ed52bc50dbb52d8bfba38b1e3cd7c73fe1b10d;p=thirdparty%2Fvalgrind.git Merge, from trunk, 2962, 2966, 2967, 2973 339433 ppc64 lxvw4x instruction uses four 32-byte loads git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_10_BRANCH@3003 --- diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index 8c1e70a3f9..6ee9a61b9d 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -15319,26 +15319,27 @@ dis_vx_load ( UInt theInstr ) } case 0x30C: { - IRExpr * t3, *t2, *t1, *t0; - UInt ea_off = 0; - IRExpr* irx_addr; + IRExpr *t0; DIP("lxvw4x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr); - t3 = load( Ity_I32, mkexpr( EA ) ); - ea_off += 4; - irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), - ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); - t2 = load( Ity_I32, irx_addr ); - ea_off += 4; - irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), - ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); - t1 = load( Ity_I32, irx_addr ); - ea_off += 4; - irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), - ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) ); - t0 = load( Ity_I32, irx_addr ); - putVSReg( XT, binop( Iop_64HLtoV128, binop( Iop_32HLto64, t3, t2 ), - binop( Iop_32HLto64, t1, t0 ) ) ); + + /* The load will result in the data being in BE order. */ + if (host_endness == VexEndnessLE) { + IRExpr *t0_BE; + IRTemp perm_LE = newTemp(Ity_V128); + + t0_BE = load( Ity_V128, mkexpr( EA ) ); + + /* Permute the data to LE format */ + assign( perm_LE, binop( Iop_64HLtoV128, mkU64(0x0c0d0e0f08090a0b), + mkU64(0x0405060700010203))); + + t0 = binop( Iop_Perm8x16, t0_BE, mkexpr(perm_LE) ); + } else { + t0 = load( Ity_V128, mkexpr( EA ) ); + } + + putVSReg( XT, t0 ); break; } default: diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 70b65fc609..cff6aa5ba1 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -1426,6 +1426,14 @@ PPCInstr* PPCInstr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ) { i->Pin.AvSel.srcR = srcR; return i; } +PPCInstr* PPCInstr_AvSh ( Bool shLeft, HReg dst, PPCAMode* addr ) { + PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); + i->tag = Pin_AvSh; + i->Pin.AvSh.shLeft = shLeft; + i->Pin.AvSh.dst = dst; + i->Pin.AvSh.addr = addr; + return i; +} PPCInstr* PPCInstr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR ) { PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr)); @@ -2008,6 +2016,30 @@ void ppPPCInstr ( PPCInstr* i, Bool mode64 ) ppHRegPPC(i->Pin.AvSel.ctl); return; + case Pin_AvSh: + /* This only generates the following instructions with RA + * register number set to 0. + */ + if (i->Pin.AvSh.addr->tag == Pam_IR) { + ppLoadImm(hregPPC_GPR30(mode64), + i->Pin.AvSh.addr->Pam.IR.index, mode64); + vex_printf(" ; "); + } + + if (i->Pin.AvSh.shLeft) + vex_printf("lvsl "); + else + vex_printf("lvsr "); + + ppHRegPPC(i->Pin.AvSh.dst); + if (i->Pin.AvSh.addr->tag == Pam_IR) + vex_printf("%%r30"); + else + ppHRegPPC(i->Pin.AvSh.addr->Pam.RR.index); + vex_printf(","); + ppHRegPPC(i->Pin.AvSh.addr->Pam.RR.base); + return; + case Pin_AvShlDbl: vex_printf("vsldoi "); ppHRegPPC(i->Pin.AvShlDbl.dst); @@ -2517,6 +2549,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, PPCInstr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->Pin.AvSel.srcL); addHRegUse(u, HRmRead, i->Pin.AvSel.srcR); return; + case Pin_AvSh: + addHRegUse(u, HRmWrite, i->Pin.AvSh.dst); + if (i->Pin.AvSh.addr->tag == Pam_IR) + addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64)); + addRegUsage_PPCAMode(u, i->Pin.AvSh.addr); + return; case Pin_AvShlDbl: addHRegUse(u, HRmWrite, i->Pin.AvShlDbl.dst); addHRegUse(u, HRmRead, i->Pin.AvShlDbl.srcL); @@ -2846,6 +2884,10 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 ) mapReg(m, &i->Pin.AvSel.srcR); mapReg(m, &i->Pin.AvSel.ctl); return; + case Pin_AvSh: + mapReg(m, &i->Pin.AvSh.dst); + mapRegs_PPCAMode(m, i->Pin.AvSh.addr); + return; case Pin_AvShlDbl: mapReg(m, &i->Pin.AvShlDbl.dst); mapReg(m, &i->Pin.AvShlDbl.srcL); @@ -3709,6 +3751,19 @@ static UChar* mkFormVX ( UChar* p, UInt opc1, UInt r1, UInt r2, return emit32(p, theInstr, endness_host); } +static UChar* mkFormVXI ( UChar* p, UInt opc1, UInt r1, UInt r2, + UInt r3, UInt opc2, VexEndness endness_host ) +{ + UInt theInstr; + vassert(opc1 < 0x40); + vassert(r1 < 0x20); + vassert(r2 < 0x20); + vassert(r3 < 0x20); + vassert(opc2 < 0x27); + theInstr = ((opc1<<26) | (r1<<21) | (r2<<16) | (r3<<11) | opc2<<1); + return emit32(p, theInstr, endness_host); +} + static UChar* mkFormVXR ( UChar* p, UInt opc1, UInt r1, UInt r2, UInt r3, UInt Rc, UInt opc2, VexEndness endness_host ) @@ -5214,6 +5269,30 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, goto done; } + case Pin_AvSh: { // vsl or vsr + UInt v_dst = vregNo(i->Pin.AvSh.dst); + Bool idxd = toBool(i->Pin.AvSh.addr->tag == Pam_RR); + UInt r_idx, r_base; + + r_base = iregNo(i->Pin.AvSh.addr->Pam.RR.base, mode64); + + if (!idxd) { + r_idx = 30; // XXX: Using r30 as temp + p = mkLoadImm(p, r_idx, + i->Pin.AvSh.addr->Pam.IR.index, mode64, endness_host); + } else { + r_idx = iregNo(i->Pin.AvSh.addr->Pam.RR.index, mode64); + } + + if (i->Pin.AvSh.shLeft) + //vsl VRT,RA,RB + p = mkFormVXI( p, 31, v_dst, r_idx, r_base, 6, endness_host ); + else + //vsr VRT,RA,RB + p = mkFormVXI( p, 31, v_dst, r_idx, r_base, 38, endness_host ); + goto done; + } + case Pin_AvShlDbl: { // vsldoi UInt shift = i->Pin.AvShlDbl.shift; UInt v_dst = vregNo(i->Pin.AvShlDbl.dst); diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index 7f3043f9f9..d7bf1a3214 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -522,6 +522,7 @@ typedef Pin_AvPerm, /* AV permute (shuffle) */ Pin_AvSel, /* AV select */ + Pin_AvSh, /* AV shift left or right */ Pin_AvShlDbl, /* AV shift-left double by imm */ Pin_AvSplat, /* One elem repeated throughout dst */ Pin_AvLdVSCR, /* mtvscr */ @@ -854,6 +855,11 @@ typedef HReg srcR; HReg ctl; } AvSel; + struct { + Bool shLeft; + HReg dst; + PPCAMode* addr; + } AvSh; struct { UChar shift; HReg dst; @@ -1077,6 +1083,7 @@ extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst, HReg srcL, HReg s extern PPCInstr* PPCInstr_AvUn32Fx4 ( PPCAvFpOp op, HReg dst, HReg src ); extern PPCInstr* PPCInstr_AvPerm ( HReg dst, HReg srcL, HReg srcR, HReg ctl ); extern PPCInstr* PPCInstr_AvSel ( HReg ctl, HReg dst, HReg srcL, HReg srcR ); +extern PPCInstr* PPCInstr_AvSh ( Bool shLeft, HReg dst, PPCAMode* am_addr ); extern PPCInstr* PPCInstr_AvShlDbl ( UChar shift, HReg dst, HReg srcL, HReg srcR ); extern PPCInstr* PPCInstr_AvSplat ( UChar sz, HReg dst, PPCVI5s* src ); extern PPCInstr* PPCInstr_AvCMov ( PPCCondCode, HReg dst, HReg src ); diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c index fcbb53e845..00f7145844 100644 --- a/VEX/priv/host_ppc_isel.c +++ b/VEX/priv/host_ppc_isel.c @@ -4871,12 +4871,57 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e, IREndness IEndianess ) } if (e->tag == Iex_Load && e->Iex.Load.end == IEndianess) { - PPCAMode* am_addr; + /* Need to be able to do V128 unaligned loads. The BE unaligned load + * can be accomplised using the following code sequece from the ISA. + * It uses the lvx instruction that does two aligned loads and then + * permute the data to store the required data as if it had been an + * unaligned load. + * + * lvx Vhi,0,Rb # load MSQ, using the unaligned address in Rb + * lvsl Vp, 0,Rb # Set permute control vector + * addi Rb,Rb,15 # Address of LSQ + * lvx Vlo,0,Rb # load LSQ + * vperm Vt,Vhi,Vlo,Vp # align the data as requested + */ + + HReg Vhi = newVRegV(env); + HReg Vlo = newVRegV(env); + HReg Vp = newVRegV(env); HReg v_dst = newVRegV(env); + HReg rB; + HReg rB_plus_15 = newVRegI(env); + vassert(e->Iex.Load.ty == Ity_V128); - am_addr = iselWordExpr_AMode(env, e->Iex.Load.addr, Ity_V128/*xfer*/, - IEndianess); - addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, v_dst, am_addr)); + rB = iselWordExpr_R( env, e->Iex.Load.addr, IEndianess ); + + // lvx Vhi, 0, Rb + addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vhi, + PPCAMode_IR(0, rB)) ); + + if (IEndianess == Iend_LE) + // lvsr Vp, 0, Rb + addInstr(env, PPCInstr_AvSh( False/*right shift*/, Vp, + PPCAMode_IR(0, rB)) ); + else + // lvsl Vp, 0, Rb + addInstr(env, PPCInstr_AvSh( True/*left shift*/, Vp, + PPCAMode_IR(0, rB)) ); + + // addi Rb_plus_15, Rb, 15 + addInstr(env, PPCInstr_Alu( Palu_ADD, rB_plus_15, + rB, PPCRH_Imm(True, toUShort(15))) ); + + // lvx Vlo, 0, Rb_plus_15 + addInstr(env, PPCInstr_AvLdSt( True/*load*/, 16, Vlo, + PPCAMode_IR(0, rB_plus_15)) ); + + if (IEndianess == Iend_LE) + // vperm Vt, Vhi, Vlo, Vp + addInstr(env, PPCInstr_AvPerm( v_dst, Vlo, Vhi, Vp )); + else + // vperm Vt, Vhi, Vlo, Vp + addInstr(env, PPCInstr_AvPerm( v_dst, Vhi, Vlo, Vp )); + return v_dst; }