aarch64 frinta and frinta vector instructions

author Paul Floyd <pjfloyd@wanadoo.fr>

Thu, 9 May 2024 19:01:52 +0000 (21:01 +0200)

committer Paul Floyd <pjfloyd@wanadoo.fr>

Thu, 9 May 2024 19:01:52 +0000 (21:01 +0200)
author Paul Floyd <pjfloyd@wanadoo.fr>
Thu, 9 May 2024 19:01:52 +0000 (21:01 +0200)
committer Paul Floyd <pjfloyd@wanadoo.fr>
Thu, 9 May 2024 19:01:52 +0000 (21:01 +0200)
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c

index c7e395b4b63d98fecc03169b91d92cc7bced38a9..27d945d6328de1286c5d8b3454e2259d16b32da2 100644 (file)
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -13821,46 +13821,57 @@ Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
        /* -------- 1,1x,11000 (apparently unassigned)    (7) -------- */
        /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
        /* rm plan:
-         FRINTN: tieeven -- !! FIXME KLUDGED !!
+         FRINTN: tieeven
           FRINTM: -inf
           FRINTP: +inf
           FRINTZ: zero
-         FRINTA: tieaway -- !! FIXME KLUDGED !!
+         FRINTA: tieaway
           FRINTX: per FPCR + "exact = TRUE"
           FRINTI: per FPCR
        */
        Bool isD = (size & 1) == 1;
        if (bitQ == 0 && isD) return False; // implied 1d case
  
-      IRTemp irrmRM = mk_get_IR_rounding_mode();
-
-      UChar ch = '?';
-      IRTemp irrm = newTemp(Ity_I32);
+      UChar   ch = '?';
+      IROp    op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
+      Bool    isBinop = True;
+      IRExpr* irrmE = NULL;
        switch (ix) {
-         case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
-         case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
-         case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
-         case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break; 
+         case 1: ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break;
+         case 2: ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
+         case 3: ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
+         case 4: ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
           // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
-         case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
+         case 5: ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break;
           // I am unsure about the following, due to the "integral exact"
           // description in the manual.  What does it mean? (frintx, that is)
-         case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
-         case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break; 
+         case 6: ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
+         case 8: ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
           default: vassert(0);
        }
  
-      IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
        if (isD) {
           for (UInt i = 0; i < 2; i++) {
-            putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
-                                            getQRegLane(nn, i, Ity_F64)));
+            if (isBinop) {
+               IRTemp irrm = newTemp(Ity_I32);
+               assign(irrm, irrmE);
+               putQRegLane(dd, i, binop(op, mkexpr(irrm),
+                                               getQRegLane(nn, i, Ity_F64)));
+            } else {
+                putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F64)));
+            }
           }
        } else {
           UInt n = bitQ==1 ? 4 : 2;
           for (UInt i = 0; i < n; i++) {
-            putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
-                                            getQRegLane(nn, i, Ity_F32)));
+            if (isBinop) {
+               IRTemp irrm = newTemp(Ity_I32);
+               assign(irrm, irrmE);
+               putQRegLane(dd, i, binop(op, mkexpr(irrm),
+                                               getQRegLane(nn, i, Ity_F32)));
+            } else {
+                putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F32)));
+            }
           }
           if (bitQ == 0)
              putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
diff --git a/none/tests/arm64/frinta_frintn.cpp b/none/tests/arm64/frinta_frintn.cpp

index 8e13761eb96683ce96cdc23a05386ed28f6db9f2..c0803688f698a746aec96c20ab8872987e880d5c 100644 (file)
--- a/none/tests/arm64/frinta_frintn.cpp
+++ b/none/tests/arm64/frinta_frintn.cpp
@@ -36,6 +36,55 @@ void test_frinta(T input, T expected)
     }
  }
  
+template<typename T>
+void test_frinta_fullvec(T* input, T* expected)
+{
+    T result[2*sizeof(double)/sizeof(T)];
+    T* rp = result;
+    if constexpr (std::is_same_v<double, T> == true)
+    {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frinta v22.2d, v23.2d;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+   }
+   else
+   {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frinta v22.4s, v23.4s;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+       assert(result[2] == expected[2]);
+       assert(result[3] == expected[3]);
+   }
+}
+
+void test_frinta_halfvec(float* input, float* expected)
+{
+    float result[2];
+    float* rp = result;
+    __asm__ __volatile__(
+         "ldr d23, [%1];\n"
+         "frinta v22.2s, v23.2s;\n"
+         "str d22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+   assert(result[0] == expected[0]);
+   assert(result[1] == expected[1]);
+}
+
+
  template<typename T>
  void test_frintn(T input, T expected)
  {
@@ -66,6 +115,54 @@ void test_frintn(T input, T expected)
     }
  }
  
+template<typename T>
+void test_frintn_fullvec(T* input, T* expected)
+{
+    T result[2*sizeof(double)/sizeof(T)];
+    T* rp = result;
+    if constexpr (std::is_same_v<double, T> == true)
+    {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frintn v22.2d, v23.2d;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+   }
+   else
+   {
+     __asm__ __volatile__(
+         "ldr q23, [%1];\n"
+         "frintn v22.4s, v23.4s;\n"
+         "str q22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+       assert(result[0] == expected[0]);
+       assert(result[1] == expected[1]);
+       assert(result[2] == expected[2]);
+       assert(result[3] == expected[3]);
+   }
+}
+
+void test_frintn_halfvec(float* input, float* expected)
+{
+    float result[2];
+    float* rp = result;
+     __asm__ __volatile__(
+         "ldr d23, [%1];\n"
+         "frintn v22.2s, v23.2s;\n"
+         "str d22, [%0];\n"
+         : "+rm" (rp)
+         : "r" (input)
+         : "memory", "v22", "v23");
+   assert(result[0] == expected[0]);
+   assert(result[1] == expected[1]);
+}
+
  int main()
  {
      // round "away from zero"
@@ -78,6 +175,36 @@ int main()
      test_frinta(-1.5F, -2.0F);
      test_frinta(-2.5F, -3.0F);
  
+    double in1[] = {1.5, 1.5};
+    double out1[] = {2.0, 2,0};
+    test_frinta_fullvec(in1, out1);
+    double in2[] = {2.5, 2.5};
+    double out2[] = {3.0, 3,0};
+    test_frinta_fullvec(in2, out2);
+    double in3[] = {-1.5, -1.5};
+    double out3[] = {-2.0, -2,0};
+    test_frinta_fullvec(in3, out3);
+    double in4[] = {-2.5, -2.5};
+    double out4[] = {-3.0, -3,0};
+    test_frinta_fullvec(in4, out4);
+
+    float in1f[] = {1.5F, 1.5F, 1.5F, 1.5F};
+    float out1f[] = {2.0F, 2.0F, 2.0F, 2.0F};
+    test_frinta_fullvec(in1f, out1f);
+    test_frinta_halfvec(in1f, out1f);
+    float in2f[] = {2.5F, 2.5F, 2.5F, 2.5F};
+    float out2f[] = {3.0F, 3.0F, 3.0F, 3.0F};
+    test_frinta_fullvec(in2f, out2f);
+    test_frinta_halfvec(in2f, out2f);
+    float in3f[] = {-1.5F, -1.5F, -1.5F, -1.5F};
+    float out3f[] = {-2.0F, -2.0F, -2.0F, -2.0F};
+    test_frinta_fullvec(in3f, out3f);
+    test_frinta_halfvec(in3f, out3f);
+    float in4f[] = {-2.5F, -2.5F, -2.5F, -2.5F};
+    float out4f[] = {-3.0F, -3.0F, -3.0F, -3.0F};
+    test_frinta_fullvec(in4f, out4f);
+    test_frinta_halfvec(in4f, out4f);
+
      // round "to even"
      test_frintn(1.5, 2.0);
      test_frintn(2.5, 2.0);
@@ -87,5 +214,19 @@ int main()
      test_frintn(2.5F, 2.0F);
      test_frintn(-1.5F, -2.0F);
      test_frintn(-2.5F, -2.0F);
+
+    test_frintn_fullvec(in1, out1);
+    test_frintn_fullvec(in2, out1);
+    test_frintn_fullvec(in3, out3);
+    test_frintn_fullvec(in4, out3);
+
+    test_frintn_fullvec(in1f, out1f);
+    test_frintn_halfvec(in1f, out1f);
+    test_frintn_fullvec(in2f, out1f);
+    test_frintn_halfvec(in2f, out1f);
+    test_frintn_fullvec(in3f, out3f);
+    test_frintn_halfvec(in3f, out3f);
+    test_frintn_fullvec(in4f, out3f);
+    test_frintn_halfvec(in4f, out3f);
  }
author	Paul Floyd <pjfloyd@wanadoo.fr>
	Thu, 9 May 2024 19:01:52 +0000 (21:01 +0200)
committer	Paul Floyd <pjfloyd@wanadoo.fr>
	Thu, 9 May 2024 19:01:52 +0000 (21:01 +0200)
VEX/priv/guest_arm64_toIR.c		patch \| blob \| blame \| history
none/tests/arm64/frinta_frintn.cpp		patch \| blob \| blame \| history