From: Julian Seward Date: Mon, 18 Aug 2014 12:28:02 +0000 (+0000) Subject: arm64: implement: X-Git-Tag: svn/VALGRIND_3_10_1^2~43 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=18e32ed7078254bc8d74561b394b5ee96d01a664;p=thirdparty%2Fvalgrind.git arm64: implement: suqadd, usqadd (scalar) suqadd, usqadd (vector) git-svn-id: svn://svn.valgrind.org/vex/trunk@2928 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index a49b7b9628..837abddbe0 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -621,6 +621,22 @@ static IROp mkVecQADDS ( UInt size ) { return ops[size]; } +static IROp mkVecQADDEXTSUSATUU ( UInt size ) { + const IROp ops[4] + = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8, + Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 }; + vassert(size < 4); + return ops[size]; +} + +static IROp mkVecQADDEXTUSSATSS ( UInt size ) { + const IROp ops[4] + = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8, + Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 }; + vassert(size < 4); + return ops[size]; +} + static IROp mkVecSUB ( UInt size ) { const IROp ops[4] = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 }; @@ -8137,6 +8153,31 @@ Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) UInt dd = INSN(4,0); vassert(size < 4); + if (opcode == BITS5(0,0,0,1,1)) { + /* -------- 0,xx,00011: SUQADD std4_std4 -------- */ + /* -------- 1,xx,00011: USQADD std4_std4 -------- */ + /* These are a bit tricky (to say the least). See comments on + the vector variants (in dis_AdvSIMD_two_reg_misc) below for + details. */ + Bool isUSQADD = bitU == 1; + IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size) + : mkVecQADDEXTUSSATSS(size); + IROp nop = mkVecADD(size); + IRTemp argL = newTempV128(); + IRTemp argR = newTempV128(); + assign(argL, getQReg128(nn)); + assign(argR, getQReg128(dd)); + IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE( + size, binop(qop, mkexpr(argL), mkexpr(argR))); + IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE( + size, binop(nop, mkexpr(argL), mkexpr(argR))); + putQReg128(dd, mkexpr(qres)); + updateQCFLAGwithDifference(qres, nres); + const HChar arr = "bhsd"[size]; + DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn); + return True; + } + if (opcode == BITS5(0,0,1,1,1)) { /* -------- 0,xx,00111 SQABS std4_std4 -------- */ /* -------- 1,xx,00111 SQNEG std4_std4 -------- */ @@ -9747,6 +9788,39 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) return True; } + if (opcode == BITS5(0,0,0,1,1)) { + /* -------- 0,xx,00011: SUQADD std7_std7 -------- */ + /* -------- 1,xx,00011: USQADD std7_std7 -------- */ + if (bitQ == 0 && size == X11) return False; // implied 1d case + Bool isUSQADD = bitU == 1; + /* This is switched (in the US vs SU sense) deliberately. + SUQADD corresponds to the ExtUSsatSS variants and + USQADD corresponds to the ExtSUsatUU variants. + See libvex_ir for more details. */ + IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size) + : mkVecQADDEXTUSSATSS(size); + IROp nop = mkVecADD(size); + IRTemp argL = newTempV128(); + IRTemp argR = newTempV128(); + IRTemp qres = newTempV128(); + IRTemp nres = newTempV128(); + /* Because the two arguments to the addition are implicitly + extended differently (one signedly, the other unsignedly) it is + important to present them to the primop in the correct order. */ + assign(argL, getQReg128(nn)); + assign(argR, getQReg128(dd)); + assign(qres, math_MAYBE_ZERO_HI64_fromE( + bitQ, binop(qop, mkexpr(argL), mkexpr(argR)))); + assign(nres, math_MAYBE_ZERO_HI64_fromE( + bitQ, binop(nop, mkexpr(argL), mkexpr(argR)))); + putQReg128(dd, mkexpr(qres)); + updateQCFLAGwithDifference(qres, nres); + const HChar* arr = nameArr_Q_SZ(bitQ, size); + DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd", + nameQReg128(dd), arr, nameQReg128(nn), arr); + return True; + } + if (opcode == BITS5(0,0,1,0,0)) { /* -------- 0,xx,00100: CLS std6_std6 -------- */ /* -------- 1,xx,00100: CLZ std6_std6 -------- */ diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index b977d194d4..df9b4270da 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -702,6 +702,22 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, } } +static void showARM64VecModifyOp(/*OUT*/const HChar** nm, + /*OUT*/const HChar** ar, + ARM64VecModifyOp op ) { + switch (op) { + case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return; + case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return; + case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return; + case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return; + case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return; + case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return; + case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return; + case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return; + default: vpanic("showARM64VecModifyOp"); + } +} + static void showARM64VecUnaryOp(/*OUT*/const HChar** nm, /*OUT*/const HChar** ar, ARM64VecUnaryOp op ) { @@ -1117,6 +1133,14 @@ ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op, i->ARM64in.VBinV.argR = argR; return i; } +ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) { + ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); + i->tag = ARM64in_VModifyV; + i->ARM64in.VModifyV.op = op; + i->ARM64in.VModifyV.mod = mod; + i->ARM64in.VModifyV.arg = arg; + return i; +} ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) { ARM64Instr* i = LibVEX_Alloc(sizeof(ARM64Instr)); i->tag = ARM64in_VUnaryV; @@ -1639,6 +1663,17 @@ void ppARM64Instr ( ARM64Instr* i ) { vex_printf(".%s", ar); return; } + case ARM64in_VModifyV: { + const HChar* nm = "??"; + const HChar* ar = "??"; + showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op); + vex_printf("%s ", nm); + ppHRegARM64(i->ARM64in.VModifyV.mod); + vex_printf(".%s, ", ar); + ppHRegARM64(i->ARM64in.VModifyV.arg); + vex_printf(".%s", ar); + return; + } case ARM64in_VUnaryV: { const HChar* nm = "??"; const HChar* ar = "??"; @@ -2000,6 +2035,11 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL); addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR); return; + case ARM64in_VModifyV: + addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod); + addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod); + addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg); + return; case ARM64in_VUnaryV: addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst); addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg); @@ -2214,6 +2254,10 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL); i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR); return; + case ARM64in_VModifyV: + i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod); + i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg); + return; case ARM64in_VUnaryV: i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst); i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg); @@ -4493,6 +4537,43 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } goto done; } + case ARM64in_VModifyV: { + /* 31 23 20 15 9 4 + 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn + 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn + */ + UInt vD = qregNo(i->ARM64in.VModifyV.mod); + UInt vN = qregNo(i->ARM64in.VModifyV.arg); + switch (i->ARM64in.VModifyV.op) { + case ARM64vecmo_SUQADD64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD); + break; + case ARM64vecmo_SUQADD32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD); + break; + case ARM64vecmo_SUQADD16x8: + *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD); + break; + case ARM64vecmo_SUQADD8x16: + *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD); + break; + case ARM64vecmo_USQADD64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD); + break; + case ARM64vecmo_USQADD32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD); + break; + case ARM64vecmo_USQADD16x8: + *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD); + break; + case ARM64vecmo_USQADD8x16: + *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD); + break; + default: + goto bad; + } + goto done; + } case ARM64in_VUnaryV: { /* 31 23 20 15 9 4 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 9755b52ba4..e100b0fc2c 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -384,6 +384,16 @@ typedef } ARM64VecBinOp; +typedef + enum { + ARM64vecmo_SUQADD64x2=335, ARM64vecmo_SUQADD32x4, + ARM64vecmo_SUQADD16x8, ARM64vecmo_SUQADD8x16, + ARM64vecmo_USQADD64x2, ARM64vecmo_USQADD32x4, + ARM64vecmo_USQADD16x8, ARM64vecmo_USQADD8x16, + ARM64vecmo_INVALID + } + ARM64VecModifyOp; + typedef enum { ARM64vecu_FNEG64x2=300, ARM64vecu_FNEG32x4, @@ -482,6 +492,7 @@ typedef ARM64in_FPSR, /* ARM64in_V*V: vector ops on vector registers */ ARM64in_VBinV, + ARM64in_VModifyV, ARM64in_VUnaryV, ARM64in_VNarrowV, ARM64in_VShiftImmV, @@ -746,6 +757,13 @@ typedef HReg argL; HReg argR; } VBinV; + /* binary vector operation on vector registers. + Dst reg is also a src. */ + struct { + ARM64VecModifyOp op; + HReg mod; + HReg arg; + } VModifyV; /* unary vector operation on vector registers */ struct { ARM64VecUnaryOp op; @@ -871,6 +889,7 @@ extern ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ); extern ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ); extern ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ); extern ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op, HReg, HReg, HReg ); +extern ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp, HReg, HReg ); extern ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg, HReg ); extern ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op, UInt dszBlg2, HReg dst, HReg src ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 078741968c..70c80732ef 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -2296,6 +2296,7 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, ARM64Instr_VQfromXX(res, argL, argR)); return res; } + /* -- Cases where we can generate a simple three-reg instruction. -- */ case Iop_AndV128: case Iop_OrV128: case Iop_XorV128: @@ -2471,6 +2472,40 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) } return res; } + /* -- These only have 2 operand instructions, so we have to first move + the first argument into a new register, for modification. -- */ + case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8: + case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2: + case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8: + case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2: + { + HReg res = newVRegV(env); + HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); + HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); + ARM64VecModifyOp op = ARM64vecmo_INVALID; + switch (e->Iex.Binop.op) { + /* In the following 8 cases, the US - SU switching is intended. + See comments on the libvex_ir.h for details. Also in the + ARM64 front end, where used these primops are generated. */ + case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break; + case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break; + case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break; + case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break; + case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break; + case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break; + case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break; + case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break; + default: vassert(0); + } + /* The order of the operands is important. Although this is + basically addition, the two operands are extended differently, + making it important to get them into the correct registers in + the instruction. */ + addInstr(env, ARM64Instr_VMov(16, res, argR)); + addInstr(env, ARM64Instr_VModifyV(op, res, argL)); + return res; + } + /* -- Shifts by an immediate. -- */ case Iop_ShrN64x2: case Iop_ShrN32x4: case Iop_ShrN16x8: case Iop_ShrN8x16: case Iop_SarN64x2: case Iop_SarN32x4: @@ -2574,7 +2609,7 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) /* else fall out; this is unhandled */ break; } - + /* -- Saturating narrowing by an immediate -- */ /* uu */ case Iop_QandQShrNnarrow16Uto8Ux8: case Iop_QandQShrNnarrow32Uto16Ux4: diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index c305c2252a..c8f90fe8d3 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -740,6 +740,16 @@ void ppIROp ( IROp op ) case Iop_QAdd32Sx4: vex_printf("QAdd32Sx4"); return; case Iop_QAdd64Ux2: vex_printf("QAdd64Ux2"); return; case Iop_QAdd64Sx2: vex_printf("QAdd64Sx2"); return; + + case Iop_QAddExtUSsatSS8x16: vex_printf("QAddExtUSsatSS8x16"); return; + case Iop_QAddExtUSsatSS16x8: vex_printf("QAddExtUSsatSS16x8"); return; + case Iop_QAddExtUSsatSS32x4: vex_printf("QAddExtUSsatSS32x4"); return; + case Iop_QAddExtUSsatSS64x2: vex_printf("QAddExtUSsatSS64x2"); return; + case Iop_QAddExtSUsatUU8x16: vex_printf("QAddExtSUsatUU8x16"); return; + case Iop_QAddExtSUsatUU16x8: vex_printf("QAddExtSUsatUU16x8"); return; + case Iop_QAddExtSUsatUU32x4: vex_printf("QAddExtSUsatUU32x4"); return; + case Iop_QAddExtSUsatUU64x2: vex_printf("QAddExtSUsatUU64x2"); return; + case Iop_PwAdd8x16: vex_printf("PwAdd8x16"); return; case Iop_PwAdd16x8: vex_printf("PwAdd16x8"); return; case Iop_PwAdd32x4: vex_printf("PwAdd32x4"); return; @@ -2892,6 +2902,10 @@ void typeOfPrimop ( IROp op, case Iop_QAdd32Ux4: case Iop_QAdd64Ux2: case Iop_QAdd8Sx16: case Iop_QAdd16Sx8: case Iop_QAdd32Sx4: case Iop_QAdd64Sx2: + case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8: + case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2: + case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8: + case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2: case Iop_PwAdd8x16: case Iop_PwAdd16x8: case Iop_PwAdd32x4: case Iop_Sub8x16: case Iop_Sub16x8: case Iop_Sub32x4: case Iop_Sub64x2: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 9f6668156c..bc2fa46ea2 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -1397,10 +1397,20 @@ typedef /* MISC (vector integer cmp != 0) */ Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2, - /* ADDITION (normal / unsigned sat / signed sat) */ - Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, - Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, - Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, + /* ADDITION (normal / U->U sat / S->S sat) */ + Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, + Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, + Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, + + /* ADDITION, ARM64 specific saturating variants. */ + /* Unsigned widen left arg, signed widen right arg, add, saturate S->S. + This corresponds to SUQADD. */ + Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8, + Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2, + /* Signed widen left arg, unsigned widen right arg, add, saturate U->U. + This corresponds to USQADD. */ + Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8, + Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2, /* SUBTRACTION (normal / unsigned sat / signed sat) */ Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2,