From: Paul Floyd <pjfloyd@wanadoo.fr>
Date: Thu, 9 May 2024 19:01:52 +0000 (+0200)
Subject: aarch64 frinta and frinta vector instructions
X-Git-Tag: VALGRIND_3_24_0~153
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7b66a5b58219ac1a4865da8e371edbdb8d765f32;p=thirdparty%2Fvalgrind.git

aarch64 frinta and frinta vector instructions

The initial fix for Bug 484426 only corrected frinta and frintn
scalar instructions. This adds support for the vector variants.
---

diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
index c7e395b4b..27d945d63 100644
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -13821,46 +13821,57 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
       /* -------- 1,1x,11000 (apparently unassigned)    (7) -------- */
       /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
       /* rm plan:
-         FRINTN: tieeven -- !! FIXME KLUDGED !!
+         FRINTN: tieeven
          FRINTM: -inf
          FRINTP: +inf
          FRINTZ: zero
-         FRINTA: tieaway -- !! FIXME KLUDGED !!
+         FRINTA: tieaway
          FRINTX: per FPCR + "exact = TRUE"
          FRINTI: per FPCR
       */
       Bool isD = (size & 1) == 1;
       if (bitQ == 0 && isD) return False; // implied 1d case
 
-      IRTemp irrmRM = mk_get_IR_rounding_mode();
-
-      UChar ch = '?';
-      IRTemp irrm = newTemp(Ity_I32);
+      UChar   ch = '?';
+      IROp    op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
+      Bool    isBinop = True;
+      IRExpr* irrmE = NULL;
       switch (ix) {
-         case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
-         case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
-         case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
-         case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break; 
+         case 1: ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break;
+         case 2: ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
+         case 3: ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
+         case 4: ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
          // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
-         case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
+         case 5: ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break;
          // I am unsure about the following, due to the "integral exact"
          // description in the manual.  What does it mean? (frintx, that is)
-         case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
-         case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break; 
+         case 6: ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
+         case 8: ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
          default: vassert(0);
       }
 
-      IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
       if (isD) {
          for (UInt i = 0; i < 2; i++) {
-            putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
-                                            getQRegLane(nn, i, Ity_F64)));
+            if (isBinop) {
+               IRTemp irrm = newTemp(Ity_I32);
+               assign(irrm, irrmE);
+               putQRegLane(dd, i, binop(op, mkexpr(irrm),
+                                               getQRegLane(nn, i, Ity_F64)));
+            } else {
+                putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F64)));
+            }
          }
       } else {
          UInt n = bitQ==1 ? 4 : 2;
          for (UInt i = 0; i < n; i++) {
-            putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
-                                            getQRegLane(nn, i, Ity_F32)));
+            if (isBinop) {
+               IRTemp irrm = newTemp(Ity_I32);
+               assign(irrm, irrmE);
+               putQRegLane(dd, i, binop(op, mkexpr(irrm),
+                                               getQRegLane(nn, i, Ity_F32)));
+            } else {
+                putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F32)));
+            }
          }
          if (bitQ == 0)
             putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
diff --git a/none/tests/arm64/frinta_frintn.cpp b/none/tests/arm64/frinta_frintn.cpp
index 8e13761eb..c0803688f 100644
--- a/none/tests/arm64/frinta_frintn.cpp
+++ b/none/tests/arm64/frinta_frintn.cpp
@@ -36,6 +36,55 @@ void test_frinta(T input, T expected)
    }
 }
 
+template<typename T>
+void test_frinta_fullvec(T* input, T* expected)
+{
+    T result[2*sizeof(double)/sizeof(T)];
+    T* rp = result;
+    if constexpr (std::is_same_v<double, T> == true)
+    {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frinta v22.2d, v23.2d;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+   }
+   else
+   {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frinta v22.4s, v23.4s;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+       assert(result[2] == expected[2]);
+       assert(result[3] == expected[3]);
+   }
+}
+
+void test_frinta_halfvec(float* input, float* expected)
+{
+    float result[2];
+    float* rp = result;
+    __asm__ __volatile__(
+         "ldr d23, [%1];\n"
+         "frinta v22.2s, v23.2s;\n"
+         "str d22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+   assert(result[0] == expected[0]);
+   assert(result[1] == expected[1]);
+}
+
+
 template<typename T>
 void test_frintn(T input, T expected)
 {
@@ -66,6 +115,54 @@ void test_frintn(T input, T expected)
    }
 }
 
+template<typename T>
+void test_frintn_fullvec(T* input, T* expected)
+{
+    T result[2*sizeof(double)/sizeof(T)];
+    T* rp = result;
+    if constexpr (std::is_same_v<double, T> == true)
+    {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frintn v22.2d, v23.2d;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+   }
+   else
+   {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frintn v22.4s, v23.4s;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+       assert(result[2] == expected[2]);
+       assert(result[3] == expected[3]);
+   }
+}
+
+void test_frintn_halfvec(float* input, float* expected)
+{
+    float result[2];
+    float* rp = result;
+     __asm__ __volatile__(
+         "ldr d23, [%1];\n"
+         "frintn v22.2s, v23.2s;\n"
+         "str d22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+   assert(result[0] == expected[0]);
+   assert(result[1] == expected[1]);
+}
+
 int main()
 {
     // round "away from zero"
@@ -78,6 +175,36 @@ int main()
     test_frinta(-1.5F, -2.0F);
     test_frinta(-2.5F, -3.0F);
 
+    double in1[] = {1.5, 1.5};
+    double out1[] = {2.0, 2,0};
+    test_frinta_fullvec(in1, out1);
+    double in2[] = {2.5, 2.5};
+    double out2[] = {3.0, 3,0};
+    test_frinta_fullvec(in2, out2);
+    double in3[] = {-1.5, -1.5};
+    double out3[] = {-2.0, -2,0};
+    test_frinta_fullvec(in3, out3);
+    double in4[] = {-2.5, -2.5};
+    double out4[] = {-3.0, -3,0};
+    test_frinta_fullvec(in4, out4);
+
+    float in1f[] = {1.5F, 1.5F, 1.5F, 1.5F};
+    float out1f[] = {2.0F, 2.0F, 2.0F, 2.0F};
+    test_frinta_fullvec(in1f, out1f);
+    test_frinta_halfvec(in1f, out1f);
+    float in2f[] = {2.5F, 2.5F, 2.5F, 2.5F};
+    float out2f[] = {3.0F, 3.0F, 3.0F, 3.0F};
+    test_frinta_fullvec(in2f, out2f);
+    test_frinta_halfvec(in2f, out2f);
+    float in3f[] = {-1.5F, -1.5F, -1.5F, -1.5F};
+    float out3f[] = {-2.0F, -2.0F, -2.0F, -2.0F};
+    test_frinta_fullvec(in3f, out3f);
+    test_frinta_halfvec(in3f, out3f);
+    float in4f[] = {-2.5F, -2.5F, -2.5F, -2.5F};
+    float out4f[] = {-3.0F, -3.0F, -3.0F, -3.0F};
+    test_frinta_fullvec(in4f, out4f);
+    test_frinta_halfvec(in4f, out4f);
+
     // round "to even"
     test_frintn(1.5, 2.0);
     test_frintn(2.5, 2.0);
@@ -87,5 +214,19 @@ int main()
     test_frintn(2.5F, 2.0F);
     test_frintn(-1.5F, -2.0F);
     test_frintn(-2.5F, -2.0F);
+
+    test_frintn_fullvec(in1, out1);
+    test_frintn_fullvec(in2, out1);
+    test_frintn_fullvec(in3, out3);
+    test_frintn_fullvec(in4, out3);
+
+    test_frintn_fullvec(in1f, out1f);
+    test_frintn_halfvec(in1f, out1f);
+    test_frintn_fullvec(in2f, out1f);
+    test_frintn_halfvec(in2f, out1f);
+    test_frintn_fullvec(in3f, out3f);
+    test_frintn_halfvec(in3f, out3f);
+    test_frintn_fullvec(in4f, out3f);
+    test_frintn_halfvec(in4f, out3f);
 }