From: Julian Seward Date: Thu, 19 Jun 2014 22:20:47 +0000 (+0000) Subject: Implement: dup_{d_d[], s_s[], h_h[], b_b[]}, ext X-Git-Tag: svn/VALGRIND_3_10_1^2~91 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3317aa4a75948266a5e9c9dbb1dbfbc4b3d3aad4;p=thirdparty%2Fvalgrind.git Implement: dup_{d_d[], s_s[], h_h[], b_b[]}, ext git-svn-id: svn://svn.valgrind.org/vex/trunk@2880 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 63186fee26..07d19c9013 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -5555,7 +5555,58 @@ void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 ) static Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn) { + /* 31 29 23 21 20 15 14 10 9 4 + 0 q 101110 op2 0 m 0 imm4 0 n d + Decode fields: op2 + */ # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + if (INSN(31,31) != 0 + || INSN(29,24) != BITS6(1,0,1,1,1,0) + || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) { + return False; + } + UInt bitQ = INSN(30,30); + UInt op2 = INSN(23,22); + UInt mm = INSN(20,16); + UInt imm4 = INSN(14,11); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + + if (op2 == BITS2(0,0)) { + /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */ + IRTemp sHi = newTemp(Ity_V128); + IRTemp sLo = newTemp(Ity_V128); + IRTemp res = newTemp(Ity_V128); + assign(sHi, getQReg128(mm)); + assign(sLo, getQReg128(nn)); + if (bitQ == 1) { + if (imm4 == 0) { + assign(res, mkexpr(sLo)); + } else { + vassert(imm4 <= 15); + assign(res, + binop(Iop_OrV128, + binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))), + binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4)))); + } + putQReg128(dd, mkexpr(res)); + DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4); + } else { + if (imm4 >= 8) return False; + if (imm4 == 0) { + assign(res, mkexpr(sLo)); + } else { + assign(res, + binop(Iop_ShrV128, + binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)), + mkU8(8 * imm4))); + } + putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); + DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4); + } + return True; + } + return False; # undef INSN } @@ -6115,7 +6166,66 @@ Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn) static Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn) { + /* 31 28 20 15 14 10 9 4 + 01 op 11110000 imm5 0 imm4 1 n d + Decode fields: op,imm4 + */ # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + if (INSN(31,30) != BITS2(0,1) + || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0) + || INSN(15,15) != 0 || INSN(10,10) != 1) { + return False; + } + UInt bitOP = INSN(29,29); + UInt imm5 = INSN(20,16); + UInt imm4 = INSN(14,11); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + + if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) { + /* -------- 0,0000 DUP (element, scalar) -------- */ + IRTemp w0 = newTemp(Ity_I64); + const HChar* arTs = "??"; + IRType laneTy = Ity_INVALID; + UInt laneNo = 16; /* invalid */ + if (imm5 & 1) { + arTs = "b"; + laneNo = (imm5 >> 1) & 15; + laneTy = Ity_I8; + assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy))); + } + else if (imm5 & 2) { + arTs = "h"; + laneNo = (imm5 >> 2) & 7; + laneTy = Ity_I16; + assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy))); + } + else if (imm5 & 4) { + arTs = "s"; + laneNo = (imm5 >> 3) & 3; + laneTy = Ity_I32; + assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy))); + } + else if (imm5 & 8) { + arTs = "d"; + laneNo = (imm5 >> 4) & 1; + laneTy = Ity_I64; + assign(w0, getQRegLane(nn, laneNo, laneTy)); + } + else { + /* invalid; leave laneTy unchanged. */ + } + /* */ + if (laneTy != Ity_INVALID) { + vassert(laneNo < 16); + putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0))); + DIP("dup %s, %s.%s[%u]\n", + nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo); + return True; + } + /* else fall through */ + } + return False; # undef INSN } diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index d7445399ac..8da0d30d8b 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -1690,6 +1690,16 @@ ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, vassert(amt > 0 && amt <= maxSh); return i; } +ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VExtV; + i->ARM64in.VExtV.dst = dst; + i->ARM64in.VExtV.srcLo = srcLo; + i->ARM64in.VExtV.srcHi = srcHi; + i->ARM64in.VExtV.amtB = amtB; + vassert(amtB >= 1 && amtB <= 15); + return i; +} //ZZ ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) { //ZZ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr)); //ZZ i->tag = ARMin_VAluS; @@ -2335,6 +2345,16 @@ void ppARM64Instr ( ARM64Instr* i ) { vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt); return; } + case ARM64in_VExtV: { + vex_printf("ext "); + ppHRegARM64(i->ARM64in.VExtV.dst); + vex_printf(".16b, "); + ppHRegARM64(i->ARM64in.VExtV.srcLo); + vex_printf(".16b, "); + ppHRegARM64(i->ARM64in.VExtV.srcHi); + vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB); + return; + } //ZZ case ARMin_VAluS: //ZZ vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op)); //ZZ ppHRegARM(i->ARMin.VAluS.dst); @@ -2816,6 +2836,10 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst); addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src); return; + case ARM64in_VExtV: + addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst); + addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo); + addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi); //ZZ case ARMin_VAluS: //ZZ addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst); //ZZ addHRegUse(u, HRmRead, i->ARMin.VAluS.argL); @@ -3112,6 +3136,12 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.VShiftImmV.src = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src); return; + case ARM64in_VExtV: + i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst); + i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo); + i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi); + return; + //ZZ case ARMin_VAluS: //ZZ i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst); //ZZ i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL); @@ -5410,8 +5440,8 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } case ARM64in_VShiftImmV: { /* - 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh - 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh + 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh + 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh where immh:immb = case T of 2d | sh in 1..63 -> let xxxxxx = 64-sh in 1xxx:xxx @@ -5419,7 +5449,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 8h | sh in 1..15 -> let xxxx = 16-sh in 001x:xxx 16b | sh in 1..7 -> let xxx = 8-sh in 0001:xxx - 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh + 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh where immh:immb = case T of 2d | sh in 1..63 -> let xxxxxx = sh in 1xxx:xxx @@ -5487,8 +5517,6 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } break; - - /* 8x16 cases */ case ARM64vecsh_SSHR8x16: syned = True; case ARM64vecsh_USHR8x16: /* fallthrough */ @@ -5507,12 +5535,26 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } break; - default: break; } goto bad; } + case ARM64in_VExtV: { + /* + 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4 + where imm4 = the shift amount, in bytes, + Vn is low operand, Vm is high operand + */ + UInt vD = qregNo(i->ARM64in.VExtV.dst); + UInt vN = qregNo(i->ARM64in.VExtV.srcLo); + UInt vM = qregNo(i->ARM64in.VExtV.srcHi); + UInt imm4 = i->ARM64in.VExtV.amtB; + vassert(imm4 >= 1 && imm4 <= 15); + *p++ = X_3_8_5_6_5_5(X011, X01110000, vM, + X000000 | (imm4 << 1), vN, vD); + goto done; + } //ZZ case ARMin_VAluS: { //ZZ UInt dN = fregNo(i->ARMin.VAluS.argL); //ZZ UInt dD = fregNo(i->ARMin.VAluS.dst); diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 9b8491e7de..23be594c12 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -561,6 +561,7 @@ typedef ARM64in_VUnaryV, ARM64in_VNarrowV, ARM64in_VShiftImmV, + ARM64in_VExtV, //ZZ ARMin_VAluS, //ZZ ARMin_VCMovD, //ZZ ARMin_VCMovS, @@ -855,6 +856,12 @@ typedef HReg src; UInt amt; } VShiftImmV; + struct { + HReg dst; + HReg srcLo; + HReg srcHi; + UInt amtB; + } VExtV; //ZZ /* 32-bit FP binary arithmetic */ //ZZ struct { //ZZ ARMVfpOp op; @@ -1051,6 +1058,8 @@ extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg ); extern ARM64Instr* ARM64Instr_VNarrowV ( UInt dszBlg2, HReg dst, HReg src ); extern ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, HReg dst, HReg src, UInt amt ); +extern ARM64Instr* ARM64Instr_VExtV ( HReg dst, + HReg srcLo, HReg srcHi, UInt amtB ); //ZZ extern ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg, HReg, HReg ); //ZZ extern ARMInstr* ARMInstr_VCMovD ( ARMCondCode, HReg dst, HReg src ); //ZZ extern ARMInstr* ARMInstr_VCMovS ( ARMCondCode, HReg dst, HReg src ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 9aa03372e8..7e85082af0 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -5626,6 +5626,49 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) /* else fall out; this is unhandled */ break; } + + case Iop_ShlV128: + case Iop_ShrV128: { + Bool isSHR = e->Iex.Binop.op == Iop_ShrV128; + /* This is tricky. Generate an EXT instruction with zeroes in + the high operand (shift right) or low operand (shift left). + Note that we can only slice in the EXT instruction at a byte + level of granularity, so the shift amount needs careful + checking. */ + IRExpr* argL = e->Iex.Binop.arg1; + IRExpr* argR = e->Iex.Binop.arg2; + if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) { + UInt amt = argR->Iex.Const.con->Ico.U8; + Bool amtOK = False; + switch (amt) { + case 0x08: case 0x10: case 0x18: case 0x20: case 0x28: + case 0x30: case 0x38: case 0x40: case 0x48: case 0x50: + case 0x58: case 0x60: case 0x68: case 0x70: case 0x78: + amtOK = True; break; + } + /* We could also deal with amt==0 by copying the source to + the destination, but there's no need for that so far. */ + if (amtOK) { + HReg src = iselV128Expr(env, argL); + HReg srcZ = newVRegV(env); + addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000)); + UInt immB = amt / 8; + vassert(immB >= 1 && immB <= 15); + HReg dst = newVRegV(env); + if (isSHR) { + addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/, + immB)); + } else { + addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/, + 16 - immB)); + } + return dst; + } + } + /* else fall out; this is unhandled */ + break; + } + //ZZ case Iop_CmpGT8Ux16: //ZZ case Iop_CmpGT16Ux8: //ZZ case Iop_CmpGT32Ux4: {