From: Julian Seward Date: Mon, 1 Sep 2014 14:13:15 +0000 (+0000) Subject: arm64: route all whole-vector shift/rotate/slice operations X-Git-Tag: svn/VALGRIND_3_10_1^2~32 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=083838de77f22b770b91c0b33196a7465146cf13;p=thirdparty%2Fvalgrind.git arm64: route all whole-vector shift/rotate/slice operations through Iop_SliceV128, so as to give it some testing. Implement Iop_SliceV128 in the back end. git-svn-id: svn://svn.valgrind.org/vex/trunk@2940 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index d0db663d66..735373e422 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -6844,11 +6844,9 @@ Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn) if (imm4 == 0) { assign(res, mkexpr(sLo)); } else { - vassert(imm4 <= 15); - assign(res, - binop(Iop_OrV128, - binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))), - binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4)))); + vassert(imm4 >= 1 && imm4 <= 15); + assign(res, triop(Iop_SliceV128, + mkexpr(sHi), mkexpr(sLo), mkU8(imm4))); } putQReg128(dd, mkexpr(res)); DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4); @@ -6857,10 +6855,12 @@ Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn) if (imm4 == 0) { assign(res, mkexpr(sLo)); } else { - assign(res, - binop(Iop_ShrV128, - binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)), - mkU8(8 * imm4))); + vassert(imm4 >= 1 && imm4 <= 7); + IRTemp hi64lo64 = newTempV128(); + assign(hi64lo64, binop(Iop_InterleaveLO64x2, + mkexpr(sHi), mkexpr(sLo))); + assign(res, triop(Iop_SliceV128, + mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4))); } putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4); @@ -7015,8 +7015,15 @@ Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn) IRTemp preR = newTempV128(); IRTemp res = newTempV128(); if (bitQ == 0 && !isZIP1) { - assign(preL, binop(Iop_ShlV128, getQReg128(mm), mkU8(32))); - assign(preR, binop(Iop_ShlV128, getQReg128(nn), mkU8(32))); + IRTemp z128 = newTempV128(); + assign(z128, mkV128(0x0000)); + // preL = Vm shifted left 32 bits + // preR = Vn shifted left 32 bits + assign(preL, triop(Iop_SliceV128, + getQReg128(mm), mkexpr(z128), mkU8(12))); + assign(preR, triop(Iop_SliceV128, + getQReg128(nn), mkexpr(z128), mkU8(12))); + } else { assign(preL, getQReg128(mm)); assign(preR, getQReg128(nn)); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 884d2c78f0..233c2750bc 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -2753,47 +2753,51 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) break; } - case Iop_ShlV128: - case Iop_ShrV128: { - Bool isSHR = e->Iex.Binop.op == Iop_ShrV128; - /* This is tricky. Generate an EXT instruction with zeroes in - the high operand (shift right) or low operand (shift left). - Note that we can only slice in the EXT instruction at a byte - level of granularity, so the shift amount needs careful - checking. */ - IRExpr* argL = e->Iex.Binop.arg1; - IRExpr* argR = e->Iex.Binop.arg2; - if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { - UInt amt = argR->Iex.Const.con->Ico.U8; - Bool amtOK = False; - switch (amt) { - case 0x08: case 0x10: case 0x18: case 0x20: case 0x28: - case 0x30: case 0x38: case 0x40: case 0x48: case 0x50: - case 0x58: case 0x60: case 0x68: case 0x70: case 0x78: - amtOK = True; break; - } - /* We could also deal with amt==0 by copying the source to - the destination, but there's no need for that so far. */ - if (amtOK) { - HReg src = iselV128Expr(env, argL); - HReg srcZ = newVRegV(env); - addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000)); - UInt immB = amt / 8; - vassert(immB >= 1 && immB <= 15); - HReg dst = newVRegV(env); - if (isSHR) { - addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/, - immB)); - } else { - addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/, - 16 - immB)); - } - return dst; - } - } - /* else fall out; this is unhandled */ - break; - } + // JRS 01 Sept 2014: these are tested and believed to be correct, + // but they are no longer used by the front end, hence commented + // out. They are replaced by Iop_SliceV128, which is more general + // and in many cases leads to better code overall. + //case Iop_ShlV128: + //case Iop_ShrV128: { + // Bool isSHR = e->Iex.Binop.op == Iop_ShrV128; + // /* This is tricky. Generate an EXT instruction with zeroes in + // the high operand (shift right) or low operand (shift left). + // Note that we can only slice in the EXT instruction at a byte + // level of granularity, so the shift amount needs careful + // checking. */ + // IRExpr* argL = e->Iex.Binop.arg1; + // IRExpr* argR = e->Iex.Binop.arg2; + // if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { + // UInt amt = argR->Iex.Const.con->Ico.U8; + // Bool amtOK = False; + // switch (amt) { + // case 0x08: case 0x10: case 0x18: case 0x20: case 0x28: + // case 0x30: case 0x38: case 0x40: case 0x48: case 0x50: + // case 0x58: case 0x60: case 0x68: case 0x70: case 0x78: + // amtOK = True; break; + // } + // /* We could also deal with amt==0 by copying the source to + // the destination, but there's no need for that so far. */ + // if (amtOK) { + // HReg src = iselV128Expr(env, argL); + // HReg srcZ = newVRegV(env); + // addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000)); + // UInt immB = amt / 8; + // vassert(immB >= 1 && immB <= 15); + // HReg dst = newVRegV(env); + // if (isSHR) { + // addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/, + // immB)); + // } else { + // addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/, + // 16 - immB)); + // } + // return dst; + // } + // } + // /* else fall out; this is unhandled */ + // break; + //} case Iop_PolynomialMull8x8: case Iop_Mull32Ux2: @@ -2857,7 +2861,30 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR)); return dst; } - } + + if (triop->op == Iop_SliceV128) { + /* Note that, compared to ShlV128/ShrV128 just above, the shift + amount here is in bytes, not bits. */ + IRExpr* argHi = triop->arg1; + IRExpr* argLo = triop->arg2; + IRExpr* argAmt = triop->arg3; + if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) { + UInt amt = argAmt->Iex.Const.con->Ico.U8; + Bool amtOK = amt >= 1 && amt <= 15; + /* We could also deal with amt==0 by copying argLO to + the destination, but there's no need for that so far. */ + if (amtOK) { + HReg srcHi = iselV128Expr(env, argHi); + HReg srcLo = iselV128Expr(env, argLo); + HReg dst = newVRegV(env); + addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt)); + return dst; + } + } + /* else fall out; this is unhandled */ + } + + } /* if (e->tag == Iex_Triop) */ v128_expr_bad: ppIRExpr(e);