target/arm: Fix f16_dotadd vs nan selection

author Richard Henderson <richard.henderson@linaro.org>

Fri, 4 Jul 2025 14:19:31 +0000 (08:19 -0600)

committer Peter Maydell <peter.maydell@linaro.org>

Fri, 4 Jul 2025 14:52:21 +0000 (15:52 +0100)
author Richard Henderson <richard.henderson@linaro.org>
Fri, 4 Jul 2025 14:19:31 +0000 (08:19 -0600)
committer Peter Maydell <peter.maydell@linaro.org>
Fri, 4 Jul 2025 14:52:21 +0000 (15:52 +0100)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c

index de0c6e54d4bbefd779e89b94753d075181beae8b..8f33387e4bd3a78c542f2661a0bb9dcd2fb16d30 100644 (file)
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -1005,25 +1005,55 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
       *  - we have pre-set-up copy of s_std which is set to round-to-odd,
       *    for the multiply (see below)
       */
-    float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16);
-    float64 e1c = float16_to_float64(e1 >> 16, true, s_f16);
-    float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16);
-    float64 e2c = float16_to_float64(e2 >> 16, true, s_f16);
-    float64 t64;
+    float16 h1r = e1 & 0xffff;
+    float16 h1c = e1 >> 16;
+    float16 h2r = e2 & 0xffff;
+    float16 h2c = e2 >> 16;
      float32 t32;
  
-    /*
-     * The ARM pseudocode function FPDot performs both multiplies
-     * and the add with a single rounding operation.  Emulate this
-     * by performing the first multiply in round-to-odd, then doing
-     * the second multiply as fused multiply-add, and rounding to
-     * float32 all in one step.
-     */
-    t64 = float64_mul(e1r, e2r, s_odd);
-    t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
+    /* C.f. FPProcessNaNs4 */
+    if (float16_is_any_nan(h1r) || float16_is_any_nan(h1c) ||
+        float16_is_any_nan(h2r) || float16_is_any_nan(h2c)) {
+        float16 t16;
+
+        if (float16_is_signaling_nan(h1r, s_f16)) {
+            t16 = h1r;
+        } else if (float16_is_signaling_nan(h1c, s_f16)) {
+            t16 = h1c;
+        } else if (float16_is_signaling_nan(h2r, s_f16)) {
+            t16 = h2r;
+        } else if (float16_is_signaling_nan(h2c, s_f16)) {
+            t16 = h2c;
+        } else if (float16_is_any_nan(h1r)) {
+            t16 = h1r;
+        } else if (float16_is_any_nan(h1c)) {
+            t16 = h1c;
+        } else if (float16_is_any_nan(h2r)) {
+            t16 = h2r;
+        } else {
+            t16 = h2c;
+        }
+        t32 = float16_to_float32(t16, true, s_f16);
+    } else {
+        float64 e1r = float16_to_float64(h1r, true, s_f16);
+        float64 e1c = float16_to_float64(h1c, true, s_f16);
+        float64 e2r = float16_to_float64(h2r, true, s_f16);
+        float64 e2c = float16_to_float64(h2c, true, s_f16);
+        float64 t64;
  
-    /* This conversion is exact, because we've already rounded. */
-    t32 = float64_to_float32(t64, s_std);
+        /*
+         * The ARM pseudocode function FPDot performs both multiplies
+         * and the add with a single rounding operation.  Emulate this
+         * by performing the first multiply in round-to-odd, then doing
+         * the second multiply as fused multiply-add, and rounding to
+         * float32 all in one step.
+         */
+        t64 = float64_mul(e1r, e2r, s_odd);
+        t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
+
+        /* This conversion is exact, because we've already rounded. */
+        t32 = float64_to_float32(t64, s_std);
+    }
  
      /* The final accumulation step is not fused. */
      return float32_add(sum, t32, s_std);
author	Richard Henderson <richard.henderson@linaro.org>
	Fri, 4 Jul 2025 14:19:31 +0000 (08:19 -0600)
committer	Peter Maydell <peter.maydell@linaro.org>
	Fri, 4 Jul 2025 14:52:21 +0000 (15:52 +0100)