From: Julian Seward Date: Sun, 21 Apr 2013 00:45:18 +0000 (+0000) Subject: Generate better code for Shl64(x, imm8) since the Neon front end X-Git-Tag: svn/VALGRIND_3_9_0^2~83 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=757ec290e69e6da2820a552e458abcc77f0f30f9;p=thirdparty%2Fvalgrind.git Generate better code for Shl64(x, imm8) since the Neon front end produces a lot of those for V{LD,ST}{1,2,3,4}. git-svn-id: svn://svn.valgrind.org/vex/trunk@2712 --- diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index 9d8e53d923..71b68a2736 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -1487,6 +1487,17 @@ ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op, return i; } +ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt ) +{ + ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); + i->tag = ARMin_NShl64; + i->ARMin.NShl64.dst = dst; + i->ARMin.NShl64.src = src; + i->ARMin.NShl64.amt = amt; + vassert(amt >= 1 && amt <= 63); + return i; +} + /* Helper copy-pasted from isel.c */ static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u ) { @@ -1929,6 +1940,13 @@ void ppARMInstr ( ARMInstr* i ) { vex_printf(", "); ppHRegARM(i->ARMin.NShift.argR); return; + case ARMin_NShl64: + vex_printf("vshl.i64 "); + ppHRegARM(i->ARMin.NShl64.dst); + vex_printf(", "); + ppHRegARM(i->ARMin.NShl64.src); + vex_printf(", #%u", i->ARMin.NShl64.amt); + return; case ARMin_NDual: vex_printf("%s%s%s ", showARMNeonDualOp(i->ARMin.NDual.op), @@ -2257,6 +2275,10 @@ void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->ARMin.NShift.argL); addHRegUse(u, HRmRead, i->ARMin.NShift.argR); return; + case ARMin_NShl64: + addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst); + addHRegUse(u, HRmRead, i->ARMin.NShl64.src); + return; case ARMin_NDual: addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1); addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2); @@ -2456,6 +2478,10 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 ) i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL); i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR); return; + case ARMin_NShl64: + i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst); + i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src); + return; case ARMin_NDual: i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1); i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2); @@ -4495,6 +4521,26 @@ Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, *p++ = insn; goto done; } + case ARMin_NShl64: { + HReg regDreg = i->ARMin.NShl64.dst; + HReg regMreg = i->ARMin.NShl64.src; + UInt amt = i->ARMin.NShl64.amt; + vassert(amt >= 1 && amt <= 63); + vassert(hregClass(regDreg) == HRcFlt64); + vassert(hregClass(regMreg) == HRcFlt64); + UInt regD = dregNo(regDreg); + UInt regM = dregNo(regMreg); + UInt D = (regD >> 4) & 1; + UInt Vd = regD & 0xF; + UInt L = 1; + UInt Q = 0; /* always 64-bit */ + UInt M = (regM >> 4) & 1; + UInt Vm = regM & 0xF; + UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1), + amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm); + *p++ = insn; + goto done; + } case ARMin_NeonImm: { UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0; UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) : diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index 6f780334ea..247d57c7bd 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -602,6 +602,7 @@ typedef ARMin_NBinary, ARMin_NBinaryS, ARMin_NShift, + ARMin_NShl64, // special case 64-bit shift of Dreg by immediate ARMin_NeonImm, ARMin_NCMovQ, /* This is not a NEON instruction. Actually there is no corresponding @@ -885,6 +886,11 @@ typedef UInt size; Bool Q; } NShift; + struct { + HReg dst; + HReg src; + UInt amt; /* 1..63 only */ + } NShl64; struct { Bool isLoad; HReg dQ; @@ -1001,6 +1007,7 @@ extern ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp, HReg, HReg, HReg, UInt, Bool ); extern ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp, HReg, HReg, HReg, UInt, Bool ); +extern ARMInstr* ARMInstr_NShl64 ( HReg, HReg, UInt ); extern ARMInstr* ARMInstr_NeonImm ( HReg, ARMNImm* ); extern ARMInstr* ARMInstr_NCMovQ ( ARMCondCode, HReg, HReg ); extern ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ); diff --git a/VEX/priv/host_arm_isel.c b/VEX/priv/host_arm_isel.c index b744cc380c..1068ff0648 100644 --- a/VEX/priv/host_arm_isel.c +++ b/VEX/priv/host_arm_isel.c @@ -2673,16 +2673,29 @@ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e ) HReg res = newVRegD(env); HReg tmp = newVRegD(env); HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1); + /* special-case Shl64(x, imm8) since the Neon front + end produces a lot of those for V{LD,ST}{1,2,3,4}. */ + if (e->Iex.Binop.op == Iop_Shl64 + && e->Iex.Binop.arg2->tag == Iex_Const) { + vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8); + Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8; + if (nshift >= 1 && nshift <= 63) { + addInstr(env, ARMInstr_NShl64(res, argL, nshift)); + return res; + } + /* else fall through to general case */ + } HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); UInt size; switch (e->Iex.Binop.op) { - case Iop_ShlN8x8: size = 0; break; + case Iop_ShlN8x8: size = 0; break; case Iop_ShlN16x4: size = 1; break; case Iop_ShlN32x2: size = 2; break; - case Iop_Shl64: size = 3; break; + case Iop_Shl64: size = 3; break; default: vassert(0); } - addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False)); + addInstr(env, ARMInstr_NUnary(ARMneon_DUP, + tmp, argR, 0, False)); addInstr(env, ARMInstr_NShift(ARMneon_VSHL, res, argL, tmp, size, False)); return res;