From: Paul Floyd Date: Thu, 9 May 2024 19:01:52 +0000 (+0200) Subject: aarch64 frinta and frinta vector instructions X-Git-Tag: VALGRIND_3_24_0~153 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7b66a5b58219ac1a4865da8e371edbdb8d765f32;p=thirdparty%2Fvalgrind.git aarch64 frinta and frinta vector instructions The initial fix for Bug 484426 only corrected frinta and frintn scalar instructions. This adds support for the vector variants. --- diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c index c7e395b4b..27d945d63 100644 --- a/VEX/priv/guest_arm64_toIR.c +++ b/VEX/priv/guest_arm64_toIR.c @@ -13821,46 +13821,57 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn) /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */ /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */ /* rm plan: - FRINTN: tieeven -- !! FIXME KLUDGED !! + FRINTN: tieeven FRINTM: -inf FRINTP: +inf FRINTZ: zero - FRINTA: tieaway -- !! FIXME KLUDGED !! + FRINTA: tieaway FRINTX: per FPCR + "exact = TRUE" FRINTI: per FPCR */ Bool isD = (size & 1) == 1; if (bitQ == 0 && isD) return False; // implied 1d case - IRTemp irrmRM = mk_get_IR_rounding_mode(); - - UChar ch = '?'; - IRTemp irrm = newTemp(Ity_I32); + UChar ch = '?'; + IROp op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt; + Bool isBinop = True; + IRExpr* irrmE = NULL; switch (ix) { - case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break; - case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break; - case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break; - case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break; + case 1: ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break; + case 2: ch = 'm'; irrmE = mkU32(Irrm_NegINF); break; + case 3: ch = 'p'; irrmE = mkU32(Irrm_PosINF); break; + case 4: ch = 'z'; irrmE = mkU32(Irrm_ZERO); break; // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0 - case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break; + case 5: ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break; // I am unsure about the following, due to the "integral exact" // description in the manual. What does it mean? (frintx, that is) - case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break; - case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break; + case 6: ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; + case 8: ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break; default: vassert(0); } - IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt; if (isD) { for (UInt i = 0; i < 2; i++) { - putQRegLane(dd, i, binop(opRND, mkexpr(irrm), - getQRegLane(nn, i, Ity_F64))); + if (isBinop) { + IRTemp irrm = newTemp(Ity_I32); + assign(irrm, irrmE); + putQRegLane(dd, i, binop(op, mkexpr(irrm), + getQRegLane(nn, i, Ity_F64))); + } else { + putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F64))); + } } } else { UInt n = bitQ==1 ? 4 : 2; for (UInt i = 0; i < n; i++) { - putQRegLane(dd, i, binop(opRND, mkexpr(irrm), - getQRegLane(nn, i, Ity_F32))); + if (isBinop) { + IRTemp irrm = newTemp(Ity_I32); + assign(irrm, irrmE); + putQRegLane(dd, i, binop(op, mkexpr(irrm), + getQRegLane(nn, i, Ity_F32))); + } else { + putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F32))); + } } if (bitQ == 0) putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3 diff --git a/none/tests/arm64/frinta_frintn.cpp b/none/tests/arm64/frinta_frintn.cpp index 8e13761eb..c0803688f 100644 --- a/none/tests/arm64/frinta_frintn.cpp +++ b/none/tests/arm64/frinta_frintn.cpp @@ -36,6 +36,55 @@ void test_frinta(T input, T expected) } } +template +void test_frinta_fullvec(T* input, T* expected) +{ + T result[2*sizeof(double)/sizeof(T)]; + T* rp = result; + if constexpr (std::is_same_v == true) + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frinta v22.2d, v23.2d;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + } + else + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frinta v22.4s, v23.4s;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + assert(result[2] == expected[2]); + assert(result[3] == expected[3]); + } +} + +void test_frinta_halfvec(float* input, float* expected) +{ + float result[2]; + float* rp = result; + __asm__ __volatile__( + "ldr d23, [%1];\n" + "frinta v22.2s, v23.2s;\n" + "str d22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); +} + + template void test_frintn(T input, T expected) { @@ -66,6 +115,54 @@ void test_frintn(T input, T expected) } } +template +void test_frintn_fullvec(T* input, T* expected) +{ + T result[2*sizeof(double)/sizeof(T)]; + T* rp = result; + if constexpr (std::is_same_v == true) + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frintn v22.2d, v23.2d;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + } + else + { + __asm__ __volatile__( + "ldr q23, [%1];\n" + "frintn v22.4s, v23.4s;\n" + "str q22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); + assert(result[2] == expected[2]); + assert(result[3] == expected[3]); + } +} + +void test_frintn_halfvec(float* input, float* expected) +{ + float result[2]; + float* rp = result; + __asm__ __volatile__( + "ldr d23, [%1];\n" + "frintn v22.2s, v23.2s;\n" + "str d22, [%0];\n" + : "+rm" (rp) + : "r" (input) + : "memory", "v22", "v23"); + assert(result[0] == expected[0]); + assert(result[1] == expected[1]); +} + int main() { // round "away from zero" @@ -78,6 +175,36 @@ int main() test_frinta(-1.5F, -2.0F); test_frinta(-2.5F, -3.0F); + double in1[] = {1.5, 1.5}; + double out1[] = {2.0, 2,0}; + test_frinta_fullvec(in1, out1); + double in2[] = {2.5, 2.5}; + double out2[] = {3.0, 3,0}; + test_frinta_fullvec(in2, out2); + double in3[] = {-1.5, -1.5}; + double out3[] = {-2.0, -2,0}; + test_frinta_fullvec(in3, out3); + double in4[] = {-2.5, -2.5}; + double out4[] = {-3.0, -3,0}; + test_frinta_fullvec(in4, out4); + + float in1f[] = {1.5F, 1.5F, 1.5F, 1.5F}; + float out1f[] = {2.0F, 2.0F, 2.0F, 2.0F}; + test_frinta_fullvec(in1f, out1f); + test_frinta_halfvec(in1f, out1f); + float in2f[] = {2.5F, 2.5F, 2.5F, 2.5F}; + float out2f[] = {3.0F, 3.0F, 3.0F, 3.0F}; + test_frinta_fullvec(in2f, out2f); + test_frinta_halfvec(in2f, out2f); + float in3f[] = {-1.5F, -1.5F, -1.5F, -1.5F}; + float out3f[] = {-2.0F, -2.0F, -2.0F, -2.0F}; + test_frinta_fullvec(in3f, out3f); + test_frinta_halfvec(in3f, out3f); + float in4f[] = {-2.5F, -2.5F, -2.5F, -2.5F}; + float out4f[] = {-3.0F, -3.0F, -3.0F, -3.0F}; + test_frinta_fullvec(in4f, out4f); + test_frinta_halfvec(in4f, out4f); + // round "to even" test_frintn(1.5, 2.0); test_frintn(2.5, 2.0); @@ -87,5 +214,19 @@ int main() test_frintn(2.5F, 2.0F); test_frintn(-1.5F, -2.0F); test_frintn(-2.5F, -2.0F); + + test_frintn_fullvec(in1, out1); + test_frintn_fullvec(in2, out1); + test_frintn_fullvec(in3, out3); + test_frintn_fullvec(in4, out3); + + test_frintn_fullvec(in1f, out1f); + test_frintn_halfvec(in1f, out1f); + test_frintn_fullvec(in2f, out1f); + test_frintn_halfvec(in2f, out1f); + test_frintn_fullvec(in3f, out3f); + test_frintn_halfvec(in3f, out3f); + test_frintn_fullvec(in4f, out3f); + test_frintn_halfvec(in4f, out3f); }