From 40d2d4089098e770929dd074704c892b796fa83c Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Thu, 19 Jun 2014 14:21:37 +0000 Subject: [PATCH] Implement: orr_{8h,4h}_imm8_shifted, orr_{4s,2s}_imm8_shifted, bic_{8h,4h}_imm8_shifted, bic_{4s,2s}_imm8_shifted, cls_std6_std6, cm{eq,ge,gt,hi,hs,tst}_d_d_d, cm{ge,gt,le,lt}_d_d_zero, cnt_{16,8}b_{16,8}b git-svn-id: svn://svn.valgrind.org/vex/trunk@2879 --- VEX/priv/guest_arm64_toIR.c | 209 +++++++++++++++++++++++++++++++++--- VEX/priv/host_arm64_defs.c | 47 +++++++- VEX/priv/host_arm64_defs.h | 3 + VEX/priv/host_arm64_isel.c | 22 ++-- 4 files changed, 255 insertions(+), 26 deletions(-) diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index c61d4f250d..63186fee26 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -5993,6 +5993,8 @@ Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn) { /* 31 28 18 15 11 9 4 0q op 01111 00000 abc cmode 01 defgh d + Decode fields: q,op,cmode + Bit 11 is really "o2", but it is always zero. */ # define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin)) if (INSN(31,31) != 0 @@ -6006,24 +6008,71 @@ Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn) UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5); UInt dd = INSN(4,0); - /* -------- {FMOV,MOVI} (vector, immediate) -------- */ - /* Allowable op:cmode - FMOV = 1:1111 - MOVI = 0:xx00, 0:0010, 1:0x00, 1:10x0, 1:110x, x:1110, - */ ULong imm64lo = 0; UInt op_cmode = (bitOP << 4) | cmode; Bool ok = False; + Bool isORR = False; + Bool isBIC = False; switch (op_cmode) { + /* -------- 1,1,1111 FMOV (vector, immediate) -------- */ case BITS5(1,1,1,1,1): // 1:1111 + ok = bitQ == 1; break; + + /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */ + /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */ case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0): // 0:0x00 + + /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */ case BITS5(0,0,0,1,0): // 1:0010 - case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00 + + /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */ + /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */ + case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:1x00 + + /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */ + /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */ case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00 + + /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */ + /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */ case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0 + + /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */ + /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */ case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x + + /* -------- 0,1,1110 MOVI 64-bit scalar -------- */ + /* -------- 1,1,1110 MOVI 64-bit vector -------- */ + /* -------- x,0,1110 MOVI 8-bit -------- */ case BITS5(1,1,1,1,0): case BITS5(0,1,1,1,0): // x:1110 ok = True; break; + + /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */ + /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */ + case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1 + ok = True; isORR = True; break; + + /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */ + /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */ + case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1 + ok = True; isBIC = True; break; + + /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */ + /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */ + /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */ + /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */ + case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1): + case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1 + ok = True; isORR = True; break; + + /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */ + /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */ + /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */ + /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */ + case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1): + case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1 + ok = True; isBIC = True; break; + default: break; } @@ -6031,9 +6080,29 @@ Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn) ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh); } if (ok) { - ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo; - putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo))); - DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo); + if (isORR || isBIC) { + ULong inv + = isORR ? 0ULL : ~0ULL; + IRExpr* immV128 + = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo)); + IRExpr* res + = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128); + putQReg128(dd, bitQ == 0 ? unop(Iop_ZeroHI64ofV128, res) : res); + const HChar* nm = isORR ? "orr" : "bic"; + if (bitQ == 0) { + putQReg128(dd, unop(Iop_ZeroHI64ofV128, res)); + DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo); + } else { + putQReg128(dd, res); + DIP("%s %s.2d, #0x%016llx'%016llx\n", nm, + nameQReg128(dd), imm64lo, imm64lo); + } + } else { + ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo; + IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)); + putQReg128(dd, immV128); + DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo); + } return True; } /* else fall through */ @@ -6168,6 +6237,40 @@ Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn) UInt dd = INSN(4,0); vassert(size < 4); + if (size == X11 && opcode == BITS5(0,0,1,1,0)) { + /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s + /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u + Bool isGT = bitU == 0; + IRExpr* argL = getQReg128(nn); + IRExpr* argR = getQReg128(mm); + IRTemp res = newTemp(Ity_V128); + assign(res, + isGT ? binop(Iop_CmpGT64Sx2, argL, argR) + : binop(Iop_CmpGT64Ux2, argL, argR)); + putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); + DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi", + nameQRegLO(dd, Ity_I64), + nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); + return True; + } + + if (size == X11 && opcode == BITS5(0,0,1,1,1)) { + /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s + /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u + Bool isGE = bitU == 0; + IRExpr* argL = getQReg128(nn); + IRExpr* argR = getQReg128(mm); + IRTemp res = newTemp(Ity_V128); + assign(res, + isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)) + : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL))); + putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); + DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs", + nameQRegLO(dd, Ity_I64), + nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); + return True; + } + if (size == X11 && opcode == BITS5(1,0,0,0,0)) { /* -------- 0,11,10000 ADD d_d_d -------- */ /* -------- 1,11,10000 SUB d_d_d -------- */ @@ -6184,6 +6287,25 @@ Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (size == X11 && opcode == BITS5(1,0,0,0,1)) { + /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0 + /* -------- 1,11,10001 CMEQ d_d_d -------- */ // == + Bool isEQ = bitU == 1; + IRExpr* argL = getQReg128(nn); + IRExpr* argR = getQReg128(mm); + IRTemp res = newTemp(Ity_V128); + assign(res, + isEQ ? binop(Iop_CmpEQ64x2, argL, argR) + : unop(Iop_NotV128, binop(Iop_CmpEQ64x2, + binop(Iop_AndV128, argL, argR), + mkV128(0x0000)))); + putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); + DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst", + nameQRegLO(dd, Ity_I64), + nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64)); + return True; + } + if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) { /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */ IRType ity = size == X11 ? Ity_F64 : Ity_F32; @@ -6226,12 +6348,41 @@ Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) UInt dd = INSN(4,0); vassert(size < 4); - if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,0,1)) { - /* -------- 0,11,01001 CMEQ d_d_#0 -------- */ + if (size == X11 && opcode == BITS5(0,1,0,0,0)) { + /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0 + /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0 + Bool isGT = bitU == 0; + IRExpr* argL = getQReg128(nn); + IRExpr* argR = mkV128(0x0000); + IRTemp res = newTemp(Ity_V128); + assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR) + : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))); + putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); + DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn); + return True; + } + + if (size == X11 && opcode == BITS5(0,1,0,0,1)) { + /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0 + /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0 + Bool isEQ = bitU == 0; + IRExpr* argL = getQReg128(nn); + IRExpr* argR = mkV128(0x0000); + IRTemp res = newTemp(Ity_V128); + assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR) + : unop(Iop_NotV128, + binop(Iop_CmpGT64Sx2, argL, argR))); + putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res))); + DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn); + return True; + } + + if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) { + /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // s 0 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0 diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 5c5988aa6f..d7445399ac 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -929,11 +929,18 @@ static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return; case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return; case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return; - case ARM64vecu_ABS64x2: *nm = "abs"; *ar = "2d"; return; - case ARM64vecu_ABS32x4: *nm = "abs"; *ar = "4s"; return; - case ARM64vecu_ABS16x8: *nm = "abs"; *ar = "8h"; return; - case ARM64vecu_ABS8x16: *nm = "abs"; *ar = "16b"; return; case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return; + case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return; + case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return; + case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return; + case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return; + case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return; + case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return; + case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return; + case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return; + case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return; + case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return; + case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return; default: vpanic("showARM64VecUnaryOp"); } } @@ -3413,6 +3420,7 @@ static inline UChar qregNo ( HReg r ) #define X001111 BITS8(0,0, 0,0,1,1,1,1) #define X010000 BITS8(0,0, 0,1,0,0,0,0) #define X010001 BITS8(0,0, 0,1,0,0,0,1) +#define X010010 BITS8(0,0, 0,1,0,0,1,0) #define X010101 BITS8(0,0, 0,1,0,1,0,1) #define X010110 BITS8(0,0, 0,1,0,1,1,0) #define X011001 BITS8(0,0, 0,1,1,0,0,1) @@ -5319,6 +5327,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b + + 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s + 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h + 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b + + 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s + 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h + 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b + + 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b */ UInt vD = qregNo(i->ARM64in.VUnaryV.dst); UInt vN = qregNo(i->ARM64in.VUnaryV.arg); @@ -5350,6 +5368,27 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64vecu_ABS8x16: *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD); break; + case ARM64vecu_CLS32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD); + break; + case ARM64vecu_CLS16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD); + break; + case ARM64vecu_CLS8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD); + break; + case ARM64vecu_CLZ32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD); + break; + case ARM64vecu_CLZ16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD); + break; + case ARM64vecu_CLZ8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD); + break; + case ARM64vecu_CNT8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD); + break; default: goto bad; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 38b2910fb5..9b8491e7de 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -356,6 +356,9 @@ typedef ARM64vecu_NOT, ARM64vecu_ABS64x2, ARM64vecu_ABS32x4, ARM64vecu_ABS16x8, ARM64vecu_ABS8x16, + ARM64vecu_CLS32x4, ARM64vecu_CLS16x8, ARM64vecu_CLS8x16, + ARM64vecu_CLZ32x4, ARM64vecu_CLZ16x8, ARM64vecu_CLZ8x16, + ARM64vecu_CNT8x16, ARM64vecu_INVALID } ARM64VecUnaryOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index d12c72d863..9aa03372e8 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -4410,14 +4410,13 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) /* Other cases */ switch (e->Iex.Unop.op) { case Iop_NotV128: - case Iop_Abs64Fx2: - case Iop_Abs32Fx4: - case Iop_Neg64Fx2: - case Iop_Neg32Fx4: - case Iop_Abs64x2: - case Iop_Abs32x4: - case Iop_Abs16x8: - case Iop_Abs8x16: + case Iop_Abs64Fx2: case Iop_Abs32Fx4: + case Iop_Neg64Fx2: case Iop_Neg32Fx4: + case Iop_Abs64x2: case Iop_Abs32x4: + case Iop_Abs16x8: case Iop_Abs8x16: + case Iop_Cls32Sx4: case Iop_Cls16Sx8: case Iop_Cls8Sx16: + case Iop_Clz32Sx4: case Iop_Clz16Sx8: case Iop_Clz8Sx16: + case Iop_Cnt8x16: { HReg res = newVRegV(env); HReg arg = iselV128Expr(env, e->Iex.Unop.arg); @@ -4432,6 +4431,13 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Abs32x4: op = ARM64vecu_ABS32x4; break; case Iop_Abs16x8: op = ARM64vecu_ABS16x8; break; case Iop_Abs8x16: op = ARM64vecu_ABS8x16; break; + case Iop_Cls32Sx4: op = ARM64vecu_CLS32x4; break; + case Iop_Cls16Sx8: op = ARM64vecu_CLS16x8; break; + case Iop_Cls8Sx16: op = ARM64vecu_CLS8x16; break; + case Iop_Clz32Sx4: op = ARM64vecu_CLZ32x4; break; + case Iop_Clz16Sx8: op = ARM64vecu_CLZ16x8; break; + case Iop_Clz8Sx16: op = ARM64vecu_CLZ8x16; break; + case Iop_Cnt8x16: op = ARM64vecu_CNT8x16; break; default: vassert(0); } addInstr(env, ARM64Instr_VUnaryV(op, res, arg)); -- 2.47.2