* - we have pre-set-up copy of s_std which is set to round-to-odd,
* for the multiply (see below)
*/
- float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16);
- float64 e1c = float16_to_float64(e1 >> 16, true, s_f16);
- float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16);
- float64 e2c = float16_to_float64(e2 >> 16, true, s_f16);
- float64 t64;
+ float16 h1r = e1 & 0xffff;
+ float16 h1c = e1 >> 16;
+ float16 h2r = e2 & 0xffff;
+ float16 h2c = e2 >> 16;
float32 t32;
- /*
- * The ARM pseudocode function FPDot performs both multiplies
- * and the add with a single rounding operation. Emulate this
- * by performing the first multiply in round-to-odd, then doing
- * the second multiply as fused multiply-add, and rounding to
- * float32 all in one step.
- */
- t64 = float64_mul(e1r, e2r, s_odd);
- t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
+ /* C.f. FPProcessNaNs4 */
+ if (float16_is_any_nan(h1r) || float16_is_any_nan(h1c) ||
+ float16_is_any_nan(h2r) || float16_is_any_nan(h2c)) {
+ float16 t16;
+
+ if (float16_is_signaling_nan(h1r, s_f16)) {
+ t16 = h1r;
+ } else if (float16_is_signaling_nan(h1c, s_f16)) {
+ t16 = h1c;
+ } else if (float16_is_signaling_nan(h2r, s_f16)) {
+ t16 = h2r;
+ } else if (float16_is_signaling_nan(h2c, s_f16)) {
+ t16 = h2c;
+ } else if (float16_is_any_nan(h1r)) {
+ t16 = h1r;
+ } else if (float16_is_any_nan(h1c)) {
+ t16 = h1c;
+ } else if (float16_is_any_nan(h2r)) {
+ t16 = h2r;
+ } else {
+ t16 = h2c;
+ }
+ t32 = float16_to_float32(t16, true, s_f16);
+ } else {
+ float64 e1r = float16_to_float64(h1r, true, s_f16);
+ float64 e1c = float16_to_float64(h1c, true, s_f16);
+ float64 e2r = float16_to_float64(h2r, true, s_f16);
+ float64 e2c = float16_to_float64(h2c, true, s_f16);
+ float64 t64;
- /* This conversion is exact, because we've already rounded. */
- t32 = float64_to_float32(t64, s_std);
+ /*
+ * The ARM pseudocode function FPDot performs both multiplies
+ * and the add with a single rounding operation. Emulate this
+ * by performing the first multiply in round-to-odd, then doing
+ * the second multiply as fused multiply-add, and rounding to
+ * float32 all in one step.
+ */
+ t64 = float64_mul(e1r, e2r, s_odd);
+ t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
+
+ /* This conversion is exact, because we've already rounded. */
+ t32 = float64_to_float32(t64, s_std);
+ }
/* The final accumulation step is not fused. */
return float32_add(sum, t32, s_std);