From: Julian Seward Date: Sun, 24 Aug 2014 20:36:14 +0000 (+0000) Subject: arm64: implement: X-Git-Tag: svn/VALGRIND_3_10_1^2~39 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=38736633862c0eeb94051cae913ed4b1488c78f5;p=thirdparty%2Fvalgrind.git arm64: implement: {zip,uzp,trn}{1,2} (vector) urecpe, ursqrte (vector) git-svn-id: svn://svn.valgrind.org/vex/trunk@2933 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 837abddbe0..d0db663d66 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -6937,7 +6937,99 @@ Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn) static Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn) { + /* 31 29 23 21 20 15 14 11 9 4 + 0 q 001110 size 0 m 0 opcode 10 n d + Decode fields: opcode + */ # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) + if (INSN(31,31) != 0 + || INSN(29,24) != BITS6(0,0,1,1,1,0) + || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) { + return False; + } + UInt bitQ = INSN(30,30); + UInt size = INSN(23,22); + UInt mm = INSN(20,16); + UInt opcode = INSN(14,12); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + + if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) { + /* -------- 001 UZP1 std7_std7_std7 -------- */ + /* -------- 101 UZP2 std7_std7_std7 -------- */ + if (bitQ == 0 && size == X11) return False; // implied 1d case + Bool isUZP1 = opcode == BITS3(0,0,1); + IROp op = isUZP1 ? mkVecCATEVENLANES(size) + : mkVecCATODDLANES(size); + IRTemp preL = newTempV128(); + IRTemp preR = newTempV128(); + IRTemp res = newTempV128(); + if (bitQ == 0) { + assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm), + getQReg128(nn))); + assign(preR, mkexpr(preL)); + } else { + assign(preL, getQReg128(mm)); + assign(preR, getQReg128(nn)); + } + assign(res, binop(op, mkexpr(preL), mkexpr(preR))); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); + const HChar* nm = isUZP1 ? "uzp1" : "uzp2"; + const HChar* arr = nameArr_Q_SZ(bitQ, size); + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + + if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) { + /* -------- 010 TRN1 std7_std7_std7 -------- */ + /* -------- 110 TRN2 std7_std7_std7 -------- */ + if (bitQ == 0 && size == X11) return False; // implied 1d case + Bool isTRN1 = opcode == BITS3(0,1,0); + IROp op1 = isTRN1 ? mkVecCATEVENLANES(size) + : mkVecCATODDLANES(size); + IROp op2 = mkVecINTERLEAVEHI(size); + IRTemp srcM = newTempV128(); + IRTemp srcN = newTempV128(); + IRTemp res = newTempV128(); + assign(srcM, getQReg128(mm)); + assign(srcN, getQReg128(nn)); + assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)), + binop(op1, mkexpr(srcN), mkexpr(srcN)))); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); + const HChar* nm = isTRN1 ? "trn1" : "trn2"; + const HChar* arr = nameArr_Q_SZ(bitQ, size); + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + + if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) { + /* -------- 011 ZIP1 std7_std7_std7 -------- */ + /* -------- 111 ZIP2 std7_std7_std7 -------- */ + if (bitQ == 0 && size == X11) return False; // implied 1d case + Bool isZIP1 = opcode == BITS3(0,1,1); + IROp op = isZIP1 ? mkVecINTERLEAVELO(size) + : mkVecINTERLEAVEHI(size); + IRTemp preL = newTempV128(); + IRTemp preR = newTempV128(); + IRTemp res = newTempV128(); + if (bitQ == 0 && !isZIP1) { + assign(preL, binop(Iop_ShlV128, getQReg128(mm), mkU8(32))); + assign(preR, binop(Iop_ShlV128, getQReg128(nn), mkU8(32))); + } else { + assign(preL, getQReg128(mm)); + assign(preR, getQReg128(nn)); + } + assign(res, binop(op, mkexpr(preL), mkexpr(preR))); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); + const HChar* nm = isZIP1 ? "zip1" : "zip2"; + const HChar* arr = nameArr_Q_SZ(bitQ, size); + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + return False; # undef INSN } @@ -10056,6 +10148,21 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (size == X10 && opcode == BITS5(1,1,1,0,0)) { + /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */ + /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */ + Bool isREC = bitU == 0; + IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4; + IRTemp res = newTempV128(); + assign(res, unop(op, getQReg128(nn))); + putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res)); + const HChar* nm = isREC ? "urecpe" : "ursqrte"; + const HChar* arr = nameArr_Q_SZ(bitQ, size); + DIP("%s %s.%s, %s.%s\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr); + return True; + } + if (size <= X01 && opcode == BITS5(1,1,1,0,1)) { /* -------- 0,0x,11101: SCVTF -------- */ /* -------- 1,0x,11101: UCVTF -------- */ diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index df9b4270da..66beb81ba0 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -722,29 +722,31 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, /*OUT*/const HChar** ar, ARM64VecUnaryOp op ) { switch (op) { - case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return; - case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return; - case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return; - case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return; - case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return; - case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return; - case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return; - case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return; - case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return; - case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return; - case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return; - case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return; - case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return; - case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return; - case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return; - case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return; - case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return; - case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return; - case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return; - case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return; - case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return; - case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return; - case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return; + case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return; + case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return; + case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return; + case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return; + case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return; + case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return; + case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return; + case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return; + case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return; + case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return; + case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return; + case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return; + case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return; + case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return; + case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return; + case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return; + case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return; + case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return; + case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return; + case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return; + case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return; + case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return; + case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return; + case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return; + case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return; default: vpanic("showARM64VecUnaryOp"); } } @@ -2548,6 +2550,7 @@ static inline UChar qregNo ( HReg r ) #define X101110 BITS8(0,0, 1,0,1,1,1,0) #define X110000 BITS8(0,0, 1,1,0,0,0,0) #define X110001 BITS8(0,0, 1,1,0,0,0,1) +#define X110010 BITS8(0,0, 1,1,0,0,1,0) #define X110100 BITS8(0,0, 1,1,0,1,0,0) #define X110101 BITS8(0,0, 1,1,0,1,0,1) #define X110111 BITS8(0,0, 1,1,0,1,1,1) @@ -4605,6 +4608,9 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s + + 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s + 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s */ UInt vD = qregNo(i->ARM64in.VUnaryV.dst); UInt vN = qregNo(i->ARM64in.VUnaryV.arg); @@ -4678,6 +4684,12 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64vecu_REV644S: *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD); break; + case ARM64vecu_URECPE32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD); + break; + case ARM64vecu_URSQRTE32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD); + break; default: goto bad; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index e100b0fc2c..a2aa7ac16c 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -386,7 +386,7 @@ typedef typedef enum { - ARM64vecmo_SUQADD64x2=335, ARM64vecmo_SUQADD32x4, + ARM64vecmo_SUQADD64x2=300, ARM64vecmo_SUQADD32x4, ARM64vecmo_SUQADD16x8, ARM64vecmo_SUQADD8x16, ARM64vecmo_USQADD64x2, ARM64vecmo_USQADD32x4, ARM64vecmo_USQADD16x8, ARM64vecmo_USQADD8x16, @@ -396,7 +396,7 @@ typedef typedef enum { - ARM64vecu_FNEG64x2=300, ARM64vecu_FNEG32x4, + ARM64vecu_FNEG64x2=350, ARM64vecu_FNEG32x4, ARM64vecu_FABS64x2, ARM64vecu_FABS32x4, ARM64vecu_NOT, ARM64vecu_ABS64x2, ARM64vecu_ABS32x4, @@ -408,13 +408,15 @@ typedef ARM64vecu_REV1616B, ARM64vecu_REV3216B, ARM64vecu_REV328H, ARM64vecu_REV6416B, ARM64vecu_REV648H, ARM64vecu_REV644S, + ARM64vecu_URECPE32x4, + ARM64vecu_URSQRTE32x4, ARM64vecu_INVALID } ARM64VecUnaryOp; typedef enum { - ARM64vecshi_USHR64x2=350, ARM64vecshi_USHR32x4, + ARM64vecshi_USHR64x2=400, ARM64vecshi_USHR32x4, ARM64vecshi_USHR16x8, ARM64vecshi_USHR8x16, ARM64vecshi_SSHR64x2, ARM64vecshi_SSHR32x4, ARM64vecshi_SSHR16x8, ARM64vecshi_SSHR8x16, @@ -441,7 +443,7 @@ typedef typedef enum { - ARM64vecna_XTN=400, + ARM64vecna_XTN=450, ARM64vecna_SQXTN, ARM64vecna_UQXTN, ARM64vecna_SQXTUN, diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 70c80732ef..0da86c5077 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -2213,35 +2213,39 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Reverse8sIn16_x8: case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4: case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2: - case Iop_Reverse32sIn64_x2: + case Iop_Reverse32sIn64_x2: + case Iop_RecipEst32Ux4: + case Iop_RSqrtEst32Ux4: { HReg res = newVRegV(env); HReg arg = iselV128Expr(env, e->Iex.Unop.arg); ARM64VecUnaryOp op = ARM64vecu_INVALID; switch (e->Iex.Unop.op) { - case Iop_NotV128: op = ARM64vecu_NOT; break; - case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; - case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; - case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; - case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break; - case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break; - case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break; - case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break; - case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break; - case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break; - case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break; - case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break; - case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break; - case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break; - case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break; - case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break; - case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break; - case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break; - case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break; - case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break; - case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break; - case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break; - case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break; + case Iop_NotV128: op = ARM64vecu_NOT; break; + case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; + case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; + case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; + case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break; + case Iop_Abs64x2: op = ARM64vecu_ABS64x2; break; + case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break; + case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break; + case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break; + case Iop_Cls32x4: op = ARM64vecu_CLS32x4; break; + case Iop_Cls16x8: op = ARM64vecu_CLS16x8; break; + case Iop_Cls8x16: op = ARM64vecu_CLS8x16; break; + case Iop_Clz32x4: op = ARM64vecu_CLZ32x4; break; + case Iop_Clz16x8: op = ARM64vecu_CLZ16x8; break; + case Iop_Clz8x16: op = ARM64vecu_CLZ8x16; break; + case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break; + case Iop_Reverse1sIn8_x16: op = ARM64vecu_RBIT; break; + case Iop_Reverse8sIn16_x8: op = ARM64vecu_REV1616B; break; + case Iop_Reverse8sIn32_x4: op = ARM64vecu_REV3216B; break; + case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H; break; + case Iop_Reverse8sIn64_x2: op = ARM64vecu_REV6416B; break; + case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H; break; + case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S; break; + case Iop_RecipEst32Ux4: op = ARM64vecu_URECPE32x4; break; + case Iop_RSqrtEst32Ux4: op = ARM64vecu_URSQRTE32x4; break; default: vassert(0); } addInstr(env, ARM64Instr_VUnaryV(op, res, arg));