From: Paul Floyd <pjfloyd@wanadoo.fr>
Date: Thu, 28 Mar 2024 19:55:38 +0000 (+0100)
Subject: Bug 484426 - aarch64: 0.5 gets rounded to 0
X-Git-Tag: VALGRIND_3_23_0~84
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=dc30fbf673953fefb115d2cf441119ee28039c9c;p=thirdparty%2Fvalgrind.git

Bug 484426 - aarch64: 0.5 gets rounded to 0
---

diff --git a/.gitignore b/.gitignore
index 982305c355..31a93cd29b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1781,6 +1781,7 @@
 /none/tests/arm64/Makefile.in
 /none/tests/arm64/allexec
 /none/tests/arm64/atomics_v81
+/none/tests/arm64/bug484426
 /none/tests/arm64/crc32
 /none/tests/arm64/cvtf_imm
 /none/tests/arm64/fmadd_sub
diff --git a/NEWS b/NEWS
index 7922646f31..3160bbe213 100644
--- a/NEWS
+++ b/NEWS
@@ -73,6 +73,7 @@ are not entered into bugzilla tend to get forgotten about or ignored.
 481131  [PATCH] x86 regtest: fix clobber lists in generated asm statements
 483786  Incorrect parameter indexing in FreeBSD clock_nanosleep syscall wrapper
 484002  Add suppression for invalid read in glibc's __wcpncpy_avx2() via wcsxfrm()
+484426  aarch64: 0.5 gets rounded to 0
 n-i-bz  Add redirect for memccpy
 
 To see details of a given bug, visit
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
index 3b33c48c3e..c7e395b4b6 100644
--- a/VEX/priv/guest_arm64_toIR.c
+++ b/VEX/priv/guest_arm64_toIR.c
@@ -44,8 +44,7 @@
      least significant mantissa bit is incorrect.  Fix: use the IR
      multiply-add IROps instead.
 
-   * FRINTA, FRINTN are kludged .. they just round to nearest.  No special
-     handling for the "ties" case.  FRINTX might be dubious too.
+   * FRINTX might be need updating to set the inexact computation FPSR flag
 
    * Ditto FCVTXN.  No idea what "round to odd" means.  This implementation
      just rounds to nearest.
@@ -15074,22 +15073,23 @@ Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
             001 +inf      (FRINTP)
             010 -inf      (FRINTM)
             011 zero      (FRINTZ)
-            000 tieeven   (FRINTN) -- !! FIXME KLUDGED !!
-            100 tieaway   (FRINTA) -- !! FIXME KLUDGED !!
+            000 tieeven   (FRINTN)
+            100 tieaway   (FRINTA)
             110 per FPCR + "exact = TRUE" (FRINTX)
             101 unallocated
       */
-      Bool    isD   = (ty & 1) == 1;
-      UInt    rm    = opcode & BITS6(0,0,0,1,1,1);
-      IRType  ity   = isD ? Ity_F64 : Ity_F32;
-      IRExpr* irrmE = NULL;
-      UChar   ch    = '?';
+      Bool    isD     = (ty & 1) == 1;
+      UInt    rm      = opcode & BITS6(0,0,0,1,1,1);
+      IRType  ity     = isD ? Ity_F64 : Ity_F32;
+      IRExpr* irrmE   = NULL;
+      UChar   ch      = '?';
+      IROp    op      = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
+      Bool    isBinop = True;
       switch (rm) {
          case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
          case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
          case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
-         // The following is a kludge.  Should be: Irrm_NEAREST_TIE_AWAY_0
-         case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
+         case BITS3(1,0,0): ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break;
          // I am unsure about the following, due to the "integral exact"
          // description in the manual.  What does it mean? (frintx, that is)
          // PJF exact means that if the rounding can't be done without
@@ -15099,17 +15099,18 @@ Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
             ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
          case BITS3(1,1,1):
             ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
-         // The following is a kludge.  There's no Irrm_ value to represent
-         // this ("to nearest, with ties to even")
-         case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
+         case BITS3(0,0,0): ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break;
          default: break;
       }
-      if (irrmE) {
+      if (irrmE || !isBinop) {
          IRTemp src = newTemp(ity);
          IRTemp dst = newTemp(ity);
          assign(src, getQRegLO(nn, ity));
-         assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
-                           irrmE, mkexpr(src)));
+         if (isBinop) {
+            assign(dst, binop(op, irrmE, mkexpr(src)));
+         } else {
+             assign(dst, unop(op, mkexpr(src)));
+         }
          putQReg128(dd, mkV128(0x0000));
          putQRegLO(dd, mkexpr(dst));
          DIP("frint%c %s, %s\n",
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
index 39c6aaa46b..0b59c87cd3 100644
--- a/VEX/priv/host_arm64_defs.c
+++ b/VEX/priv/host_arm64_defs.c
@@ -572,11 +572,13 @@ static const HChar* showARM64FpTriOp ( ARM64FpTriOp op ) {
 
 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
    switch (op) {
-      case ARM64fpu_NEG:   return "neg  ";
-      case ARM64fpu_ABS:   return "abs  ";
-      case ARM64fpu_SQRT:  return "sqrt ";
-      case ARM64fpu_RINT:  return "rinti";
-      case ARM64fpu_RECPX: return "recpx";
+      case ARM64fpu_NEG:    return "neg  ";
+      case ARM64fpu_ABS:    return "abs  ";
+      case ARM64fpu_SQRT:   return "sqrt ";
+      case ARM64fpu_RINT:   return "rinti";
+      case ARM64fpu_RINTA0: return "rinta";
+      case ARM64fpu_RINTE:  return "rintn";
+      case ARM64fpu_RECPX:  return "recpx";
       default: vpanic("showARM64FpUnaryOp");
    }
 }
@@ -2968,6 +2970,7 @@ static inline UInt qregEnc ( HReg r )
 
 #define X00000   BITS8(0,0,0, 0,0,0,0,0)
 #define X00001   BITS8(0,0,0, 0,0,0,0,1)
+#define X00100   BITS8(0,0,0, 0,0,1,0,0)
 #define X00110   BITS8(0,0,0, 0,0,1,1,0)
 #define X00111   BITS8(0,0,0, 0,0,1,1,1)
 #define X01000   BITS8(0,0,0, 0,1,0,0,0)
@@ -4582,7 +4585,21 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
            *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
            goto done;
          }
-         /* 
+         /*
+            000, 11110 01 1,001 10,0 10000 n d  FRINTA Dd, Dm (round away from zero)
+         */
+         if (i->ARM64in.VUnaryD.op == ARM64fpu_RINTA0) {
+           *p++ = X_3_8_5_6_5_5(X000, X11110011, X00110, X010000, dN, dD);
+           goto done;
+         }
+         /*
+            000, 11110 01 1,001 10,0 10000 n d  FRINTN Dd, Dm (round to even)
+         */
+         if (i->ARM64in.VUnaryD.op == ARM64fpu_RINTE) {
+           *p++ = X_3_8_5_6_5_5(X000, X11110011, X00100, X010000, dN, dD);
+           goto done;
+         }
+         /*
             010, 11110 11 1,0000 1,1111 10 n d  FRECPX Dd, Dm
          */
          if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
@@ -4620,7 +4637,21 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
            *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
            goto done;
          }
-         /* 
+         /*
+            000, 11110 00 1,001 11,1 10000 n d  FRINTA Sd, Sm (round away from zero)
+         */
+         if (i->ARM64in.VUnaryS.op == ARM64fpu_RINTA0) {
+           *p++ = X_3_8_5_6_5_5(X000, X11110001, X00110, X010000, sN, sD);
+           goto done;
+         }
+         /*
+            000, 11110 00 1,001 11,1 10000 n d  FRINTN Sd, Sm (round to even)
+         */
+         if (i->ARM64in.VUnaryS.op == ARM64fpu_RINTE) {
+           *p++ = X_3_8_5_6_5_5(X000, X11110001, X00100, X010000, sN, sD);
+           goto done;
+         }
+         /*
             010, 11110 10 1,0000 1,1111 10 n d  FRECPX Sd, Sm
          */
          if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
index dc686dff7f..e78d8045ff 100644
--- a/VEX/priv/host_arm64_defs.h
+++ b/VEX/priv/host_arm64_defs.h
@@ -321,6 +321,8 @@ typedef
       ARM64fpu_ABS,
       ARM64fpu_SQRT,
       ARM64fpu_RINT,
+      ARM64fpu_RINTA0,
+      ARM64fpu_RINTE,
       ARM64fpu_RECPX,
       ARM64fpu_INVALID
    }
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
index 1fa61edb2d..645358586f 100644
--- a/VEX/priv/host_arm64_isel.c
+++ b/VEX/priv/host_arm64_isel.c
@@ -3438,6 +3438,18 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
             addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
             return dst;
          }
+         case Iop_RoundF64toIntA0: {
+            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+            HReg dst = newVRegD(env);
+            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINTA0, dst, src));
+            return dst;
+         }
+         case Iop_RoundF64toIntE: {
+            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
+            HReg dst = newVRegD(env);
+            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINTE, dst, src));
+            return dst;
+         }
          default:
             break;
       }
@@ -3626,6 +3638,18 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
             addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
             return dst;
          }
+         case Iop_RoundF32toIntA0: {
+            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+            HReg dst = newVRegD(env);
+            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINTA0, dst, src));
+            return dst;
+         }
+         case Iop_RoundF32toIntE: {
+            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
+            HReg dst = newVRegD(env);
+            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINTE, dst, src));
+            return dst;
+         }
          default:
             break;
       }
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 0ef49eaa6a..93e9a98d50 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -440,7 +440,11 @@ void ppIROp ( IROp op )
 
       case Iop_RoundF128toInt: vex_printf("RoundF128toInt"); return;
       case Iop_RoundF64toInt: vex_printf("RoundF64toInt"); return;
+      case Iop_RoundF64toIntA0: vex_printf("RoundF64toIntA0"); return;
+      case Iop_RoundF64toIntE: vex_printf("RoundF64toIntE"); return;
       case Iop_RoundF32toInt: vex_printf("RoundF32toInt"); return;
+      case Iop_RoundF32toIntA0: vex_printf("RoundF32toIntA0"); return;
+      case Iop_RoundF32toIntE: vex_printf("RoundF32toIntE"); return;
       case Iop_RoundF64toF32: vex_printf("RoundF64toF32"); return;
 
       case Iop_ReinterpV128asI128: vex_printf("ReinterpV128asI128"); return;
@@ -1470,8 +1474,10 @@ Bool primopMightTrap ( IROp op )
    case Iop_Yl2xp1F64: case Iop_PRemF64: case Iop_PRemC3210F64:
    case Iop_PRem1F64: case Iop_PRem1C3210F64: case Iop_ScaleF64:
    case Iop_SinF64: case Iop_CosF64: case Iop_TanF64:
-   case Iop_2xm1F64: case Iop_RoundF128toInt: case Iop_RoundF64toInt:
-   case Iop_RoundF32toInt: case Iop_MAddF32: case Iop_MSubF32:
+   case Iop_2xm1F64: case Iop_RoundF128toInt:
+   case Iop_RoundF64toInt: case Iop_RoundF64toIntA0: case Iop_RoundF64toIntE:
+   case Iop_RoundF32toInt: case Iop_RoundF32toIntA0: case Iop_RoundF32toIntE:
+   case Iop_MAddF32: case Iop_MSubF32:
    case Iop_MAddF64: case Iop_MSubF64:
    case Iop_MAddF64r32: case Iop_MSubF64r32:
    case Iop_RSqrtEst5GoodF64: case Iop_RoundF64toF64_NEAREST:
@@ -3402,6 +3408,10 @@ void typeOfPrimop ( IROp op,
       case Iop_RecpExpF32:
          BINARY(ity_RMode,Ity_F32, Ity_F32);
 
+      case Iop_RoundF32toIntA0:
+      case Iop_RoundF32toIntE:
+         UNARY(Ity_F32, Ity_F32);
+
       case Iop_SqrtF16:
          BINARY(ity_RMode, Ity_F16, Ity_F16);
 
@@ -3479,6 +3489,9 @@ void typeOfPrimop ( IROp op,
       case Iop_2xm1F64:
       case Iop_RoundF64toInt: BINARY(ity_RMode,Ity_F64, Ity_F64);
 
+      case Iop_RoundF64toIntA0: case Iop_RoundF64toIntE:
+         UNARY(Ity_F64, Ity_F64);
+
       case Iop_MAddF64: case Iop_MSubF64:
       case Iop_MAddF64r32: case Iop_MSubF64r32:
          QUATERNARY(ity_RMode,Ity_F64,Ity_F64,Ity_F64, Ity_F64);
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index 85805bb69b..1b4efdc90e 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -782,8 +782,13 @@ typedef
                              as F128) */
       Iop_RoundF64toInt, /* F64 value to nearest integral value (still
                             as F64) */
+      Iop_RoundF64toIntA0, /* As Iop_RoundF64toInt but ties to above zero*/
+
+      Iop_RoundF64toIntE, /* As Iop_RoundF64toInt but ties to even */
       Iop_RoundF32toInt, /* F32 value to nearest integral value (still
                             as F32) */
+      Iop_RoundF32toIntA0, /* As Iop_RoundF32toInt but ties to above zero*/
+      Iop_RoundF32toIntE, /* As Iop_RoundF32toInt but ties to even */
 
       /* --- guest s390 specifics, not mandated by 754. --- */
 
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index ec8ac53217..05e6d59afa 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -5288,6 +5288,8 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
       case Iop_RoundF64toF64_NegINF:
       case Iop_RoundF64toF64_PosINF:
       case Iop_RoundF64toF64_ZERO:
+      case Iop_RoundF64toIntA0:
+      case Iop_RoundF64toIntE:
       case Iop_D32toD64:
       case Iop_I32StoD64:
       case Iop_I32UtoD64:
@@ -5305,7 +5307,9 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
       case Iop_TruncF64asF32:
       case Iop_NegF32:
       case Iop_AbsF32:
-      case Iop_F16toF32: 
+      case Iop_F16toF32:
+      case Iop_RoundF32toIntA0:
+      case Iop_RoundF32toIntE:
          return mkPCastTo(mce, Ity_I32, vatom);
 
       case Iop_AbsF16:
diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
index 24f258bf0c..1ab0ee2fc0 100644
--- a/memcheck/tests/vbit-test/irops.c
+++ b/memcheck/tests/vbit-test/irops.c
@@ -293,7 +293,11 @@ static irop_t irops[] = {
   { DEFOP(Iop_2xm1F64,       UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
   { DEFOP(Iop_RoundF128toInt, UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 0 },
   { DEFOP(Iop_RoundF64toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
+  { DEFOP(Iop_RoundF64toIntA0, UNDEF_ALL), .arm64 = 1 },
+  { DEFOP(Iop_RoundF64toIntE, UNDEF_ALL), .arm64 = 1  },
   { DEFOP(Iop_RoundF32toInt, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 1, .mips64 = 1 },
+  { DEFOP(Iop_RoundF32toIntA0, UNDEF_ALL), .arm64 = 1 },
+  { DEFOP(Iop_RoundF32toIntE, UNDEF_ALL), .arm64 = 1 },
   { DEFOP(Iop_MAddF32,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
   { DEFOP(Iop_MSubF32,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 = 0, .mips64 = 1 },
   { DEFOP(Iop_MAddF64,       UNDEF_ALL), .s390x = 1, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 = 0, .mips64 = 1 },
diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am
index cc0ed14811..e9570459de 100644
--- a/none/tests/arm64/Makefile.am
+++ b/none/tests/arm64/Makefile.am
@@ -4,6 +4,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
 dist_noinst_SCRIPTS = filter_stderr
 
 EXTRA_DIST = \
+	bug484426.stdout.exp bug484426.stderr.exp bug484426.vgtest \
         crc32.stdout.exp crc32.stderr.exp crc32.vgtest \
         cvtf_imm.stdout.exp cvtf_imm.stderr.exp cvtf_imm.vgtest \
 	fp_and_simd.stdout.exp fp_and_simd.stderr.exp fp_and_simd.vgtest \
@@ -22,6 +23,7 @@ EXTRA_DIST = \
 
 check_PROGRAMS = \
 	allexec \
+	bug484426 \
         cvtf_imm \
 	fp_and_simd \
 	integer \
@@ -51,6 +53,8 @@ AM_CCASFLAGS += @FLAG_M64@
 
 allexec_CFLAGS     = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@
 
+bug484426_SOURCES  = bug484426.cpp
+
 crc32_CFLAGS       = $(AM_CFLAGS) -march=armv8-a+crc
 atomics_v81_CFLAGS = $(AM_CFLAGS) -march=armv8.1-a
 simd_v81_CFLAGS    = $(AM_CFLAGS) -march=armv8.1-a+crypto
diff --git a/none/tests/arm64/bug484426.cpp b/none/tests/arm64/bug484426.cpp
new file mode 100644
index 0000000000..3b1657ffb8
--- /dev/null
+++ b/none/tests/arm64/bug484426.cpp
@@ -0,0 +1,8 @@
+#include <cmath>
+#include <iostream>
+
+int main(int argc, char** argv)
+{
+   float value = std::stof(argv[1]);
+   std::cout << std::round(value) << "\n";
+}
diff --git a/none/tests/arm64/bug484426.stderr.exp b/none/tests/arm64/bug484426.stderr.exp
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/none/tests/arm64/bug484426.stdout.exp b/none/tests/arm64/bug484426.stdout.exp
new file mode 100644
index 0000000000..d00491fd7e
--- /dev/null
+++ b/none/tests/arm64/bug484426.stdout.exp
@@ -0,0 +1 @@
+1
diff --git a/none/tests/arm64/bug484426.vgtest b/none/tests/arm64/bug484426.vgtest
new file mode 100644
index 0000000000..e4afbec40c
--- /dev/null
+++ b/none/tests/arm64/bug484426.vgtest
@@ -0,0 +1,3 @@
+prog: bug484426
+args: 0.5
+vgopts: -q