From: Julian Seward Date: Sun, 27 Apr 2014 12:02:12 +0000 (+0000) Subject: Finish off vector integer comparison instructions, and X-Git-Tag: svn/VALGRIND_3_10_1^2~121 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=932e981e4ed10deff1e006e0b57000349e764232;p=thirdparty%2Fvalgrind.git Finish off vector integer comparison instructions, and vector shift-by-immediates (Shr/Shl/Sar) instructions. git-svn-id: svn://svn.valgrind.org/vex/trunk@2849 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 9010115548..bb762e07b4 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -6325,7 +6325,7 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */ /* 31 28 23 21 15 9 4 ix 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) == - 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, == 0 + 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, != 0 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s @@ -6387,9 +6387,9 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) */ switch (ix) { case 1: res = binop(opsEQ[szBlg2], argL, argR); break; - case 2: binop(opsEQ[szBlg2], - binop(Iop_AndV128, argL, argR), - mkV128(0x0000)); + case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2], + binop(Iop_AndV128, argL, argR), + mkV128(0x0000))); break; case 3: res = binop(opsGTU[szBlg2], argL, argR); break; case 4: res = binop(opsGTS[szBlg2], argL, argR); break; diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 0fd5c7d93d..bab569cb4c 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -850,51 +850,59 @@ static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) { static void showARM64VecBinOp(/*OUT*/const HChar** nm, /*OUT*/const HChar** ar, ARM64VecBinOp op ) { switch (op) { - case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return; - case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return; - case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return; - case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return; - case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return; - case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return; - case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return; - case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return; - case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return; - case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return; - case ARM64vecb_FADD64x2: *nm = "fadd"; *ar = "2d"; return; - case ARM64vecb_FSUB64x2: *nm = "fsub"; *ar = "2d"; return; - case ARM64vecb_FMUL64x2: *nm = "fmul"; *ar = "2d"; return; - case ARM64vecb_FDIV64x2: *nm = "fdiv"; *ar = "2d"; return; - case ARM64vecb_FADD32x4: *nm = "fadd"; *ar = "4s"; return; - case ARM64vecb_FSUB32x4: *nm = "fsub"; *ar = "4s"; return; - case ARM64vecb_FMUL32x4: *nm = "fmul"; *ar = "4s"; return; - case ARM64vecb_FDIV32x4: *nm = "fdiv"; *ar = "4s"; return; - case ARM64vecb_UMAX32x4: *nm = "umax"; *ar = "4s"; return; - case ARM64vecb_UMAX16x8: *nm = "umax"; *ar = "8h"; return; - case ARM64vecb_UMAX8x16: *nm = "umax"; *ar = "16b"; return; - case ARM64vecb_UMIN32x4: *nm = "umin"; *ar = "4s"; return; - case ARM64vecb_UMIN16x8: *nm = "umin"; *ar = "8h"; return; - case ARM64vecb_UMIN8x16: *nm = "umin"; *ar = "16b"; return; - case ARM64vecb_SMAX32x4: *nm = "smax"; *ar = "4s"; return; - case ARM64vecb_SMAX16x8: *nm = "smax"; *ar = "8h"; return; - case ARM64vecb_SMAX8x16: *nm = "smax"; *ar = "16b"; return; - case ARM64vecb_SMIN32x4: *nm = "smin"; *ar = "4s"; return; - case ARM64vecb_SMIN16x8: *nm = "smin"; *ar = "8h"; return; - case ARM64vecb_SMIN8x16: *nm = "smin"; *ar = "16b"; return; - case ARM64vecb_AND: *nm = "and "; *ar = "all"; return; - case ARM64vecb_ORR: *nm = "orr "; *ar = "all"; return; - case ARM64vecb_XOR: *nm = "eor "; *ar = "all"; return; - case ARM64vecb_CMEQ64x2: *nm = "cmeq"; *ar = "2d"; return; - case ARM64vecb_CMEQ32x4: *nm = "cmeq"; *ar = "4s"; return; - case ARM64vecb_CMEQ16x8: *nm = "cmeq"; *ar = "8h"; return; - case ARM64vecb_CMEQ8x16: *nm = "cmeq"; *ar = "16b"; return; + case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return; + case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return; + case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return; + case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return; + case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return; + case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return; + case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return; + case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return; + case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return; + case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return; + case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return; + case ARM64vecb_FADD64x2: *nm = "fadd"; *ar = "2d"; return; + case ARM64vecb_FSUB64x2: *nm = "fsub"; *ar = "2d"; return; + case ARM64vecb_FMUL64x2: *nm = "fmul"; *ar = "2d"; return; + case ARM64vecb_FDIV64x2: *nm = "fdiv"; *ar = "2d"; return; + case ARM64vecb_FADD32x4: *nm = "fadd"; *ar = "4s"; return; + case ARM64vecb_FSUB32x4: *nm = "fsub"; *ar = "4s"; return; + case ARM64vecb_FMUL32x4: *nm = "fmul"; *ar = "4s"; return; + case ARM64vecb_FDIV32x4: *nm = "fdiv"; *ar = "4s"; return; + case ARM64vecb_UMAX32x4: *nm = "umax"; *ar = "4s"; return; + case ARM64vecb_UMAX16x8: *nm = "umax"; *ar = "8h"; return; + case ARM64vecb_UMAX8x16: *nm = "umax"; *ar = "16b"; return; + case ARM64vecb_UMIN32x4: *nm = "umin"; *ar = "4s"; return; + case ARM64vecb_UMIN16x8: *nm = "umin"; *ar = "8h"; return; + case ARM64vecb_UMIN8x16: *nm = "umin"; *ar = "16b"; return; + case ARM64vecb_SMAX32x4: *nm = "smax"; *ar = "4s"; return; + case ARM64vecb_SMAX16x8: *nm = "smax"; *ar = "8h"; return; + case ARM64vecb_SMAX8x16: *nm = "smax"; *ar = "16b"; return; + case ARM64vecb_SMIN32x4: *nm = "smin"; *ar = "4s"; return; + case ARM64vecb_SMIN16x8: *nm = "smin"; *ar = "8h"; return; + case ARM64vecb_SMIN8x16: *nm = "smin"; *ar = "16b"; return; + case ARM64vecb_AND: *nm = "and "; *ar = "all"; return; + case ARM64vecb_ORR: *nm = "orr "; *ar = "all"; return; + case ARM64vecb_XOR: *nm = "eor "; *ar = "all"; return; + case ARM64vecb_CMEQ64x2: *nm = "cmeq"; *ar = "2d"; return; + case ARM64vecb_CMEQ32x4: *nm = "cmeq"; *ar = "4s"; return; + case ARM64vecb_CMEQ16x8: *nm = "cmeq"; *ar = "8h"; return; + case ARM64vecb_CMEQ8x16: *nm = "cmeq"; *ar = "16b"; return; + case ARM64vecb_CMHI64x2: *nm = "cmhi"; *ar = "2d"; return; + case ARM64vecb_CMHI32x4: *nm = "cmhi"; *ar = "4s"; return; + case ARM64vecb_CMHI16x8: *nm = "cmhi"; *ar = "8h"; return; + case ARM64vecb_CMHI8x16: *nm = "cmhi"; *ar = "16b"; return; + case ARM64vecb_CMGT64x2: *nm = "cmgt"; *ar = "2d"; return; + case ARM64vecb_CMGT32x4: *nm = "cmgt"; *ar = "4s"; return; + case ARM64vecb_CMGT16x8: *nm = "cmgt"; *ar = "8h"; return; + case ARM64vecb_CMGT8x16: *nm = "cmgt"; *ar = "16b"; return; case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return; case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return; case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return; case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return; case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return; case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return; - case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return; - case ARM64vecb_CMHI8x16: *nm = "cmhi"; *ar = "16b"; return; + case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return; default: vpanic("showARM64VecBinOp"); } } @@ -918,9 +926,17 @@ static void showARM64VecShiftOp(/*OUT*/const HChar** nm, { switch (op) { case ARM64vecsh_USHR64x2: *nm = "ushr "; *ar = "2d"; return; + case ARM64vecsh_USHR32x4: *nm = "ushr "; *ar = "4s"; return; case ARM64vecsh_USHR16x8: *nm = "ushr "; *ar = "8h"; return; + case ARM64vecsh_USHR8x16: *nm = "ushr "; *ar = "16b"; return; case ARM64vecsh_SSHR64x2: *nm = "sshr "; *ar = "2d"; return; + case ARM64vecsh_SSHR32x4: *nm = "sshr "; *ar = "4s"; return; + case ARM64vecsh_SSHR16x8: *nm = "sshr "; *ar = "8h"; return; + case ARM64vecsh_SSHR8x16: *nm = "sshr "; *ar = "16b"; return; + case ARM64vecsh_SHL64x2: *nm = "shl "; *ar = "2d"; return; case ARM64vecsh_SHL32x4: *nm = "shl "; *ar = "4s"; return; + case ARM64vecsh_SHL16x8: *nm = "shl "; *ar = "8h"; return; + case ARM64vecsh_SHL8x16: *nm = "shl "; *ar = "16b"; return; default: vpanic("showARM64VecShiftImmOp"); } } @@ -1631,11 +1647,17 @@ ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftOp op, UInt maxSh = 0; switch (op) { case ARM64vecsh_USHR64x2: case ARM64vecsh_SSHR64x2: + case ARM64vecsh_SHL64x2: maxSh = 63; break; + case ARM64vecsh_USHR32x4: case ARM64vecsh_SSHR32x4: case ARM64vecsh_SHL32x4: maxSh = 31; break; - case ARM64vecsh_USHR16x8: + case ARM64vecsh_USHR16x8: case ARM64vecsh_SSHR16x8: + case ARM64vecsh_SHL16x8: maxSh = 15; break; + case ARM64vecsh_USHR8x16: case ARM64vecsh_SSHR8x16: + case ARM64vecsh_SHL8x16: + maxSh = 7; break; default: vassert(0); } @@ -3368,6 +3390,7 @@ static inline UChar qregNo ( HReg r ) #define X111110 BITS8(0,0, 1,1,1,1,1,0) #define X111111 BITS8(0,0, 1,1,1,1,1,1) +#define X0001000 BITS8(0, 0,0,0,1,0,0,0) #define X0010000 BITS8(0, 0,0,1,0,0,0,0) #define X0100000 BITS8(0, 0,1,0,0,0,0,0) #define X1000000 BITS8(0, 1,0,0,0,0,0,0) @@ -4918,18 +4941,19 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } case ARM64in_VBinV: { /* 31 23 20 15 9 4 - 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d - 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s - 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h + 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d + 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b - 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d - 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s - 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h + 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d + 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b - 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s - 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h + 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h + 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s @@ -4966,8 +4990,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b - 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d >u, ATC - 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d >s, ATC + 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d + 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s + 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h + 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b + + 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d + 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s + 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h + 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s @@ -4980,7 +5011,6 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b - 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b */ UInt vD = qregNo(i->ARM64in.VBinV.dst); UInt vN = qregNo(i->ARM64in.VBinV.argL); @@ -5016,6 +5046,9 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64vecb_MUL16x8: *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD); break; + case ARM64vecb_MUL8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD); + break; case ARM64vecb_FADD64x2: *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD); break; @@ -5104,6 +5137,32 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD); break; + case ARM64vecb_CMHI64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD); + break; + case ARM64vecb_CMHI32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD); + break; + case ARM64vecb_CMHI16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD); + break; + case ARM64vecb_CMHI8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD); + break; + + case ARM64vecb_CMGT64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD); + break; + case ARM64vecb_CMGT32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD); + break; + case ARM64vecb_CMGT16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD); + break; + case ARM64vecb_CMGT8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD); + break; + case ARM64vecb_FCMEQ64x2: *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD); break; @@ -5129,9 +5188,6 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD); break; - case ARM64vecb_CMHI8x16: - *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD); - break; default: goto bad; } @@ -5207,6 +5263,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, ARM64VecShiftOp op = i->ARM64in.VShiftImmV.op; Bool syned = False; switch (op) { + /* 64x2 cases */ case ARM64vecsh_SSHR64x2: syned = True; case ARM64vecsh_USHR64x2: /* fallthrough */ if (sh >= 1 && sh <= 63) { @@ -5216,6 +5273,24 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } break; + case ARM64vecsh_SHL64x2: + if (sh >= 1 && sh <= 63) { + UInt xxxxxx = sh; + *p++ = X_3_6_7_6_5_5(X010, X011110, + X1000000 | xxxxxx, X010101, vN, vD); + goto done; + } + break; + /* 32x4 cases */ + case ARM64vecsh_SSHR32x4: syned = True; + case ARM64vecsh_USHR32x4: /* fallthrough */ + if (sh >= 1 && sh <= 31) { + UInt xxxxx = 32-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X0100000 | xxxxx, X000001, vN, vD); + goto done; + } + break; case ARM64vecsh_SHL32x4: if (sh >= 1 && sh <= 31) { UInt xxxxx = sh; @@ -5224,7 +5299,8 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } break; - //case ARM64vecsh_SSHR16x8: syned = True; ATC + /* 16x8 cases */ + case ARM64vecsh_SSHR16x8: syned = True; case ARM64vecsh_USHR16x8: /* fallthrough */ if (sh >= 1 && sh <= 15) { UInt xxxx = 16-sh; @@ -5233,6 +5309,35 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, goto done; } break; + case ARM64vecsh_SHL16x8: + if (sh >= 1 && sh <= 15) { + UInt xxxx = sh; + *p++ = X_3_6_7_6_5_5(X010, X011110, + X0010000 | xxxx, X010101, vN, vD); + goto done; + } + break; + + + /* 8x16 cases */ + case ARM64vecsh_SSHR8x16: syned = True; + case ARM64vecsh_USHR8x16: /* fallthrough */ + if (sh >= 1 && sh <= 7) { + UInt xxx = 8-sh; + *p++ = X_3_6_7_6_5_5(syned ? X010 : X011, X011110, + X0001000 | xxx, X000001, vN, vD); + goto done; + } + break; + case ARM64vecsh_SHL8x16: + if (sh >= 1 && sh <= 7) { + UInt xxx = sh; + *p++ = X_3_6_7_6_5_5(X010, X011110, + X0001000 | xxx, X010101, vN, vD); + goto done; + } + break; + default: break; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 3d27ecda0c..99e9b063d9 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -317,6 +317,7 @@ typedef ARM64vecb_SUB8x16, ARM64vecb_MUL32x4, ARM64vecb_MUL16x8, + ARM64vecb_MUL8x16, ARM64vecb_FADD64x2, ARM64vecb_FSUB64x2, ARM64vecb_FMUL64x2, @@ -344,6 +345,14 @@ typedef ARM64vecb_CMEQ32x4, ARM64vecb_CMEQ16x8, ARM64vecb_CMEQ8x16, + ARM64vecb_CMHI64x2, /* >u */ + ARM64vecb_CMHI32x4, + ARM64vecb_CMHI16x8, + ARM64vecb_CMHI8x16, + ARM64vecb_CMGT64x2, /* >s */ + ARM64vecb_CMGT32x4, + ARM64vecb_CMGT16x8, + ARM64vecb_CMGT8x16, ARM64vecb_FCMEQ64x2, ARM64vecb_FCMEQ32x4, ARM64vecb_FCMGE64x2, @@ -351,7 +360,6 @@ typedef ARM64vecb_FCMGT64x2, ARM64vecb_FCMGT32x4, ARM64vecb_TBL1, - ARM64vecb_CMHI8x16, ARM64vecb_INVALID } ARM64VecBinOp; @@ -370,9 +378,17 @@ typedef typedef enum { ARM64vecsh_USHR64x2=350, + ARM64vecsh_USHR32x4, ARM64vecsh_USHR16x8, + ARM64vecsh_USHR8x16, ARM64vecsh_SSHR64x2, + ARM64vecsh_SSHR32x4, + ARM64vecsh_SSHR16x8, + ARM64vecsh_SSHR8x16, + ARM64vecsh_SHL64x2, ARM64vecsh_SHL32x4, + ARM64vecsh_SHL16x8, + ARM64vecsh_SHL8x16, ARM64vecsh_INVALID } ARM64VecShiftOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index adf117129b..e9e5c1c226 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -4925,7 +4925,19 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Sub8x16: case Iop_Mul32x4: case Iop_Mul16x8: + case Iop_Mul8x16: case Iop_CmpEQ64x2: + case Iop_CmpEQ32x4: + case Iop_CmpEQ16x8: + case Iop_CmpEQ8x16: + case Iop_CmpGT64Ux2: + case Iop_CmpGT32Ux4: + case Iop_CmpGT16Ux8: + case Iop_CmpGT8Ux16: + case Iop_CmpGT64Sx2: + case Iop_CmpGT32Sx4: + case Iop_CmpGT16Sx8: + case Iop_CmpGT8Sx16: case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4: case Iop_CmpLE64Fx2: @@ -4933,7 +4945,6 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4: case Iop_Perm8x16: - case Iop_CmpGT8Ux16: { HReg res = newVRegV(env); HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); @@ -4966,7 +4977,19 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Sub8x16: op = ARM64vecb_SUB8x16; break; case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; + case Iop_Mul8x16: op = ARM64vecb_MUL8x16; break; case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; + case Iop_CmpEQ32x4: op = ARM64vecb_CMEQ32x4; break; + case Iop_CmpEQ16x8: op = ARM64vecb_CMEQ16x8; break; + case Iop_CmpEQ8x16: op = ARM64vecb_CMEQ8x16; break; + case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break; + case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break; + case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break; + case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break; + case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break; + case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break; + case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break; + case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break; case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break; case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break; case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break; @@ -4974,7 +4997,6 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; case Iop_Perm8x16: op = ARM64vecb_TBL1; break; - case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break; default: vassert(0); } if (sw) { @@ -5474,13 +5496,18 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) //ZZ res, argL, size, True)); //ZZ return res; //ZZ } -//ZZ case Iop_ShrN8x16: -//ZZ case Iop_ShrN16x8: -//ZZ case Iop_ShrN32x4: case Iop_ShrN64x2: + case Iop_ShrN32x4: case Iop_ShrN16x8: + case Iop_ShrN8x16: case Iop_SarN64x2: + case Iop_SarN32x4: + case Iop_SarN16x8: + case Iop_SarN8x16: + case Iop_ShlN64x2: case Iop_ShlN32x4: + case Iop_ShlN16x8: + case Iop_ShlN8x16: { IRExpr* argL = e->Iex.Binop.arg1; IRExpr* argR = e->Iex.Binop.arg2; @@ -5491,12 +5518,28 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) switch (e->Iex.Binop.op) { case Iop_ShrN64x2: op = ARM64vecsh_USHR64x2; limit = 63; break; + case Iop_ShrN32x4: + op = ARM64vecsh_USHR32x4; limit = 31; break; case Iop_ShrN16x8: op = ARM64vecsh_USHR16x8; limit = 15; break; + case Iop_ShrN8x16: + op = ARM64vecsh_USHR8x16; limit = 7; break; case Iop_SarN64x2: op = ARM64vecsh_SSHR64x2; limit = 63; break; + case Iop_SarN32x4: + op = ARM64vecsh_SSHR32x4; limit = 31; break; + case Iop_SarN16x8: + op = ARM64vecsh_SSHR16x8; limit = 15; break; + case Iop_SarN8x16: + op = ARM64vecsh_SSHR8x16; limit = 7; break; + case Iop_ShlN64x2: + op = ARM64vecsh_SHL64x2; limit = 63; break; case Iop_ShlN32x4: op = ARM64vecsh_SHL32x4; limit = 31; break; + case Iop_ShlN16x8: + op = ARM64vecsh_SHL16x8; limit = 15; break; + case Iop_ShlN8x16: + op = ARM64vecsh_SHL8x16; limit = 7; break; default: vassert(0); } @@ -5510,50 +5553,6 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) /* else fall out; this is unhandled */ break; } -//ZZ case Iop_ShlN8x16: -//ZZ case Iop_ShlN16x8: -//ZZ case Iop_ShlN32x4: -//ZZ case Iop_ShlN64x2: { -//ZZ HReg res = newVRegV(env); -//ZZ HReg tmp = newVRegV(env); -//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); -//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); -//ZZ UInt size; -//ZZ switch (e->Iex.Binop.op) { -//ZZ case Iop_ShlN8x16: size = 0; break; -//ZZ case Iop_ShlN16x8: size = 1; break; -//ZZ case Iop_ShlN32x4: size = 2; break; -//ZZ case Iop_ShlN64x2: size = 3; break; -//ZZ default: vassert(0); -//ZZ } -//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True)); -//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSHL, -//ZZ res, argL, tmp, size, True)); -//ZZ return res; -//ZZ } -//ZZ case Iop_SarN8x16: -//ZZ case Iop_SarN16x8: -//ZZ case Iop_SarN32x4: -//ZZ case Iop_SarN64x2: { -//ZZ HReg res = newVRegV(env); -//ZZ HReg tmp = newVRegV(env); -//ZZ HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1); -//ZZ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2); -//ZZ HReg argR2 = newVRegI(env); -//ZZ UInt size; -//ZZ switch (e->Iex.Binop.op) { -//ZZ case Iop_SarN8x16: size = 0; break; -//ZZ case Iop_SarN16x8: size = 1; break; -//ZZ case Iop_SarN32x4: size = 2; break; -//ZZ case Iop_SarN64x2: size = 3; break; -//ZZ default: vassert(0); -//ZZ } -//ZZ addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR)); -//ZZ addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True)); -//ZZ addInstr(env, ARMInstr_NShift(ARMneon_VSAL, -//ZZ res, argL, tmp, size, True)); -//ZZ return res; -//ZZ } //ZZ case Iop_CmpGT8Ux16: //ZZ case Iop_CmpGT16Ux8: //ZZ case Iop_CmpGT32Ux4: {