From: Julian Seward Date: Thu, 27 Mar 2014 18:59:00 +0000 (+0000) Subject: Implement FCM{EQ,GE,GT}, FAC{GE,GT} (vector). X-Git-Tag: svn/VALGRIND_3_10_1^2~128 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=32c2715e514b2b61e8166948bda66e26a2327036;p=thirdparty%2Fvalgrind.git Implement FCM{EQ,GE,GT}, FAC{GE,GT} (vector). git-svn-id: svn://svn.valgrind.org/vex/trunk@2842 --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index 9b895e56ce..6dde926b71 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -5751,6 +5751,85 @@ Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn) } } + /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */ + /* 31 28 22 20 15 9 4 case + 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm + 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm + 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm + 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm + 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm + */ + if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1 + && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) { + Bool isQ = INSN(30,30) == 1; + UInt U = INSN(29,29); + UInt E = INSN(23,23); + Bool isF64 = INSN(22,22) == 1; + UInt ac = INSN(11,11); + UInt mm = INSN(20,16); + UInt nn = INSN(9,5); + UInt dd = INSN(4,0); + /* */ + UInt EUac = (E << 2) | (U << 1) | ac; + IROp opABS = Iop_INVALID; + IROp opCMP = Iop_INVALID; + IRType laneTy = Ity_INVALID; + Bool zeroHI = False; + Bool swap = True; + const HChar* arr = "??"; + const HChar* nm = "??"; + Bool ok + = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64); + if (ok) { + vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32)); + switch (EUac) { + case BITS3(0,0,0): + nm = "fcmeq"; + opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4; + swap = False; + break; + case BITS3(0,1,0): + nm = "fcmge"; + opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; + break; + case BITS3(0,1,1): + nm = "facge"; + opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; + opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; + break; + case BITS3(1,1,0): + nm = "fcmgt"; + opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4; + break; + case BITS3(1,1,1): + nm = "fcagt"; + opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4; + opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4; + break; + default: + break; + } + } + if (opCMP != Iop_INVALID) { + IRExpr* argN = getQReg128(nn); + IRExpr* argM = getQReg128(mm); + if (opABS != Iop_INVALID) { + argN = unop(opABS, argN); + argM = unop(opABS, argM); + } + IRExpr* res = swap ? binop(opCMP, argM, argN) + : binop(opCMP, argN, argM); + if (zeroHI) { + res = unop(Iop_ZeroHI64ofV128, res); + } + putQReg128(dd, res); + DIP("%s %s.%s, %s.%s, %s.%s\n", nm, + nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr); + return True; + } + /* else fall through */ + } + /* -------------------- FCVTN -------------------- */ /* 31 28 23 20 15 9 4 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index a26f5771a6..dd37053a83 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -885,6 +885,12 @@ static void showARM64VecBinOp(/*OUT*/const HChar** nm, case ARM64vecb_CMEQ32x4: *nm = "cmeq"; *ar = "4s"; return; case ARM64vecb_CMEQ16x8: *nm = "cmeq"; *ar = "8h"; return; case ARM64vecb_CMEQ8x16: *nm = "cmeq"; *ar = "16b"; return; + case ARM64vecb_FCMEQ64x2: *nm = "fcmeq"; *ar = "2d"; return; + case ARM64vecb_FCMEQ32x4: *nm = "fcmeq"; *ar = "4s"; return; + case ARM64vecb_FCMGE64x2: *nm = "fcmge"; *ar = "2d"; return; + case ARM64vecb_FCMGE32x4: *nm = "fcmge"; *ar = "4s"; return; + case ARM64vecb_FCMGT64x2: *nm = "fcmgt"; *ar = "2d"; return; + case ARM64vecb_FCMGT32x4: *nm = "fcmgt"; *ar = "4s"; return; default: vpanic("showARM64VecBinOp"); } } @@ -4955,6 +4961,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d >u, ATC 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d >s, ATC + + 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d + 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s + + 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d + 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s + + 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d + 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s */ UInt vD = qregNo(i->ARM64in.VBinV.dst); UInt vN = qregNo(i->ARM64in.VBinV.argL); @@ -5072,6 +5087,26 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD); break; + case ARM64vecb_FCMEQ64x2: + *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD); + break; + case ARM64vecb_FCMEQ32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD); + break; + + case ARM64vecb_FCMGE64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD); + break; + case ARM64vecb_FCMGE32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD); + break; + + case ARM64vecb_FCMGT64x2: + *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD); + break; + case ARM64vecb_FCMGT32x4: + *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD); + break; default: goto bad; } @@ -5091,6 +5126,9 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, case ARM64vecu_FABS64x2: *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD); break; + case ARM64vecu_FABS32x4: + *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD); + break; case ARM64vecu_FNEG64x2: *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD); break; diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index ede7e55ba4..bee6d2ce10 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -342,6 +342,12 @@ typedef ARM64vecb_CMEQ32x4, ARM64vecb_CMEQ16x8, ARM64vecb_CMEQ8x16, + ARM64vecb_FCMEQ64x2, + ARM64vecb_FCMEQ32x4, + ARM64vecb_FCMGE64x2, + ARM64vecb_FCMGE32x4, + ARM64vecb_FCMGT64x2, + ARM64vecb_FCMGT32x4, ARM64vecb_INVALID } ARM64VecBinOp; diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index d50a6db640..712d161ccb 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -4401,6 +4401,7 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) switch (e->Iex.Unop.op) { case Iop_NotV128: case Iop_Abs64Fx2: + case Iop_Abs32Fx4: case Iop_Neg64Fx2: { HReg res = newVRegV(env); HReg arg = iselV128Expr(env, e->Iex.Unop.arg); @@ -4408,6 +4409,7 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) switch (e->Iex.Unop.op) { case Iop_NotV128: op = ARM64vecu_NOT; break; case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break; + case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break; case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break; default: vassert(0); } @@ -4921,39 +4923,57 @@ static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e ) case Iop_Sub16x8: case Iop_Mul32x4: case Iop_Mul16x8: - case Iop_CmpEQ64x2: { + case Iop_CmpEQ64x2: + case Iop_CmpEQ64Fx2: + case Iop_CmpEQ32Fx4: + case Iop_CmpLE64Fx2: + case Iop_CmpLE32Fx4: + case Iop_CmpLT64Fx2: + case Iop_CmpLT32Fx4: + { HReg res = newVRegV(env); HReg argL = iselV128Expr(env, e->Iex.Binop.arg1); HReg argR = iselV128Expr(env, e->Iex.Binop.arg2); + Bool sw = False; ARM64VecBinOp op = ARM64vecb_INVALID; switch (e->Iex.Binop.op) { - case Iop_AndV128: op = ARM64vecb_AND; break; - case Iop_OrV128: op = ARM64vecb_ORR; break; - case Iop_XorV128: op = ARM64vecb_XOR; break; - case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; - case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; - case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; - case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; - case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; - case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; - case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; - case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; - case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break; - case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; - case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; - case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break; - case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; - case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; - case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; - case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; - case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; - case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; - case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; - case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; - case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; + case Iop_AndV128: op = ARM64vecb_AND; break; + case Iop_OrV128: op = ARM64vecb_ORR; break; + case Iop_XorV128: op = ARM64vecb_XOR; break; + case Iop_Max32Ux4: op = ARM64vecb_UMAX32x4; break; + case Iop_Max16Ux8: op = ARM64vecb_UMAX16x8; break; + case Iop_Max8Ux16: op = ARM64vecb_UMAX8x16; break; + case Iop_Min32Ux4: op = ARM64vecb_UMIN32x4; break; + case Iop_Min16Ux8: op = ARM64vecb_UMIN16x8; break; + case Iop_Min8Ux16: op = ARM64vecb_UMIN8x16; break; + case Iop_Max32Sx4: op = ARM64vecb_SMAX32x4; break; + case Iop_Max16Sx8: op = ARM64vecb_SMAX16x8; break; + case Iop_Max8Sx16: op = ARM64vecb_SMAX8x16; break; + case Iop_Min32Sx4: op = ARM64vecb_SMIN32x4; break; + case Iop_Min16Sx8: op = ARM64vecb_SMIN16x8; break; + case Iop_Min8Sx16: op = ARM64vecb_SMIN8x16; break; + case Iop_Add64x2: op = ARM64vecb_ADD64x2; break; + case Iop_Add32x4: op = ARM64vecb_ADD32x4; break; + case Iop_Add16x8: op = ARM64vecb_ADD16x8; break; + case Iop_Sub64x2: op = ARM64vecb_SUB64x2; break; + case Iop_Sub32x4: op = ARM64vecb_SUB32x4; break; + case Iop_Sub16x8: op = ARM64vecb_SUB16x8; break; + case Iop_Mul32x4: op = ARM64vecb_MUL32x4; break; + case Iop_Mul16x8: op = ARM64vecb_MUL16x8; break; + case Iop_CmpEQ64x2: op = ARM64vecb_CMEQ64x2; break; + case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break; + case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break; + case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break; + case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break; + case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break; + case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break; default: vassert(0); } - addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); + if (sw) { + addInstr(env, ARM64Instr_VBinV(op, res, argR, argL)); + } else { + addInstr(env, ARM64Instr_VBinV(op, res, argL, argR)); + } return res; } //ZZ case Iop_Add32Fx4: {