]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/ldbl-128/s_fmal.c
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128 / s_fmal.c
CommitLineData
3e692e05 1/* Compute x * y + z as ternary operation.
dff8da6b 2 Copyright (C) 2010-2024 Free Software Foundation, Inc.
3e692e05 3 This file is part of the GNU C Library.
3e692e05
JJ
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
3e692e05 18
4b6574a6 19#define NO_MATH_REDIRECT
3e692e05 20#include <float.h>
b3f27d81 21#define f64xfmaf128 __hide_f64xfmaf128
3e692e05 22#include <math.h>
b3f27d81 23#undef f64xfmaf128
3e692e05
JJ
24#include <fenv.h>
25#include <ieee754.h>
b4d5b8b0 26#include <math-barriers.h>
4842e4fe 27#include <math_private.h>
fd3b4e7c 28#include <libm-alias-ldouble.h>
b3f27d81 29#include <math-narrow-alias.h>
ef82f4da 30#include <tininess.h>
628d90c5 31#include <math-use-builtins.h>
3e692e05
JJ
32
33/* This implementation uses rounding to odd to avoid problems with
34 double rounding. See a paper by Boldo and Melquiond:
35 http://www.lri.fr/~melquion/doc/08-tc.pdf */
36
15089e04
PM
37_Float128
38__fmal (_Float128 x, _Float128 y, _Float128 z)
3e692e05 39{
628d90c5
VG
40#if USE_FMAL_BUILTIN
41 return __builtin_fmal (x, y, z);
42#else
3e692e05
JJ
43 union ieee854_long_double u, v, w;
44 int adjust = 0;
45 u.d = x;
46 v.d = y;
47 w.d = z;
48 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
49 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
50 - LDBL_MANT_DIG, 0)
51 || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
52 || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
53 || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
54 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
55 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
56 {
57 /* If z is Inf, but x and y are finite, the result should be
58 z rather than NaN. */
59 if (w.ieee.exponent == 0x7fff
60 && u.ieee.exponent != 0x7fff
61 && v.ieee.exponent != 0x7fff)
62 return (z + x) + y;
bec749fd
JM
63 /* If z is zero and x are y are nonzero, compute the result
64 as x * y to avoid the wrong sign of a zero result if x * y
65 underflows to 0. */
66 if (z == 0 && x != 0 && y != 0)
67 return x * y;
a0c2940d
JM
68 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
69 x * y + z. */
3e692e05
JJ
70 if (u.ieee.exponent == 0x7fff
71 || v.ieee.exponent == 0x7fff
72 || w.ieee.exponent == 0x7fff
473611b2
JM
73 || x == 0
74 || y == 0)
3e692e05 75 return x * y + z;
a0c2940d
JM
76 /* If fma will certainly overflow, compute as x * y. */
77 if (u.ieee.exponent + v.ieee.exponent
78 > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
79 return x * y;
1f4dafa3 80 /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
473611b2
JM
81 result nor whether there is underflow depends on its exact
82 value, only on its sign. */
83 if (u.ieee.exponent + v.ieee.exponent
84 < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
85 {
86 int neg = u.ieee.negative ^ v.ieee.negative;
02bbfb41 87 _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
473611b2
JM
88 if (w.ieee.exponent >= 3)
89 return tiny + z;
90 /* Scaling up, adding TINY and scaling down produces the
91 correct result, because in round-to-nearest mode adding
92 TINY has no effect and in other modes double rounding is
93 harmless. But it may not produce required underflow
94 exceptions. */
02bbfb41 95 v.d = z * L(0x1p114) + tiny;
473611b2
JM
96 if (TININESS_AFTER_ROUNDING
97 ? v.ieee.exponent < 115
98 : (w.ieee.exponent == 0
99 || (w.ieee.exponent == 1
100 && w.ieee.negative != neg
101 && w.ieee.mantissa3 == 0
102 && w.ieee.mantissa2 == 0
103 && w.ieee.mantissa1 == 0
104 && w.ieee.mantissa0 == 0)))
105 {
15089e04 106 _Float128 force_underflow = x * y;
d96164c3 107 math_force_eval (force_underflow);
473611b2 108 }
02bbfb41 109 return v.d * L(0x1p-114);
473611b2 110 }
3e692e05
JJ
111 if (u.ieee.exponent + v.ieee.exponent
112 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
113 {
114 /* Compute 1p-113 times smaller result and multiply
115 at the end. */
116 if (u.ieee.exponent > v.ieee.exponent)
117 u.ieee.exponent -= LDBL_MANT_DIG;
118 else
119 v.ieee.exponent -= LDBL_MANT_DIG;
120 /* If x + y exponent is very large and z exponent is very small,
121 it doesn't matter if we don't adjust it. */
122 if (w.ieee.exponent > LDBL_MANT_DIG)
123 w.ieee.exponent -= LDBL_MANT_DIG;
124 adjust = 1;
125 }
126 else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
127 {
128 /* Similarly.
129 If z exponent is very large and x and y exponents are
82477c28
JM
130 very small, adjust them up to avoid spurious underflows,
131 rather than down. */
132 if (u.ieee.exponent + v.ieee.exponent
739babd7 133 <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
82477c28
JM
134 {
135 if (u.ieee.exponent > v.ieee.exponent)
136 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
137 else
138 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
139 }
140 else if (u.ieee.exponent > v.ieee.exponent)
3e692e05
JJ
141 {
142 if (u.ieee.exponent > LDBL_MANT_DIG)
143 u.ieee.exponent -= LDBL_MANT_DIG;
144 }
145 else if (v.ieee.exponent > LDBL_MANT_DIG)
146 v.ieee.exponent -= LDBL_MANT_DIG;
147 w.ieee.exponent -= LDBL_MANT_DIG;
148 adjust = 1;
149 }
150 else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
151 {
152 u.ieee.exponent -= LDBL_MANT_DIG;
153 if (v.ieee.exponent)
154 v.ieee.exponent += LDBL_MANT_DIG;
155 else
02bbfb41 156 v.d *= L(0x1p113);
3e692e05
JJ
157 }
158 else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
159 {
160 v.ieee.exponent -= LDBL_MANT_DIG;
161 if (u.ieee.exponent)
162 u.ieee.exponent += LDBL_MANT_DIG;
163 else
02bbfb41 164 u.d *= L(0x1p113);
3e692e05
JJ
165 }
166 else /* if (u.ieee.exponent + v.ieee.exponent
167 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
168 {
169 if (u.ieee.exponent > v.ieee.exponent)
82477c28 170 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05 171 else
82477c28
JM
172 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
173 if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
3e692e05
JJ
174 {
175 if (w.ieee.exponent)
82477c28 176 w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05 177 else
02bbfb41 178 w.d *= L(0x1p228);
3e692e05
JJ
179 adjust = -1;
180 }
181 /* Otherwise x * y should just affect inexact
182 and nothing else. */
183 }
184 x = u.d;
185 y = v.d;
186 z = w.d;
187 }
8ec5b013
JM
188
189 /* Ensure correct sign of exact 0 + 0. */
a1ffb40e 190 if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
09245377
L
191 {
192 x = math_opt_barrier (x);
193 return x * y + z;
194 }
8ec5b013 195
5b5b04d6
JM
196 fenv_t env;
197 feholdexcept (&env);
198 fesetround (FE_TONEAREST);
199
3e692e05
JJ
200 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
201#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
15089e04
PM
202 _Float128 x1 = x * C;
203 _Float128 y1 = y * C;
204 _Float128 m1 = x * y;
3e692e05
JJ
205 x1 = (x - x1) + x1;
206 y1 = (y - y1) + y1;
15089e04
PM
207 _Float128 x2 = x - x1;
208 _Float128 y2 = y - y1;
209 _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
3e692e05
JJ
210
211 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
15089e04
PM
212 _Float128 a1 = z + m1;
213 _Float128 t1 = a1 - z;
214 _Float128 t2 = a1 - t1;
3e692e05
JJ
215 t1 = m1 - t1;
216 t2 = z - t2;
15089e04 217 _Float128 a2 = t1 + t2;
4896f049
RH
218 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
219 math_force_eval (m2);
220 math_force_eval (a2);
5b5b04d6
JM
221 feclearexcept (FE_INEXACT);
222
4896f049 223 /* If the result is an exact zero, ensure it has the correct sign. */
5b5b04d6
JM
224 if (a1 == 0 && m2 == 0)
225 {
226 feupdateenv (&env);
4896f049
RH
227 /* Ensure that round-to-nearest value of z + m1 is not reused. */
228 z = math_opt_barrier (z);
5b5b04d6
JM
229 return z + m1;
230 }
3e692e05 231
3e692e05
JJ
232 fesetround (FE_TOWARDZERO);
233 /* Perform m2 + a2 addition with round to odd. */
234 u.d = a2 + m2;
235
a1ffb40e 236 if (__glibc_likely (adjust == 0))
3e692e05
JJ
237 {
238 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
239 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
240 feupdateenv (&env);
241 /* Result is a1 + u.d. */
242 return a1 + u.d;
243 }
a1ffb40e 244 else if (__glibc_likely (adjust > 0))
3e692e05
JJ
245 {
246 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
247 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
248 feupdateenv (&env);
249 /* Result is a1 + u.d, scaled up. */
02bbfb41 250 return (a1 + u.d) * L(0x1p113);
3e692e05
JJ
251 }
252 else
253 {
254 if ((u.ieee.mantissa3 & 1) == 0)
255 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
256 v.d = a1 + u.d;
7c08a05c 257 /* Ensure the addition is not scheduled after fetestexcept call. */
4842e4fe 258 math_force_eval (v.d);
3e692e05
JJ
259 int j = fetestexcept (FE_INEXACT) != 0;
260 feupdateenv (&env);
261 /* Ensure the following computations are performed in default rounding
262 mode instead of just reusing the round to zero computation. */
263 asm volatile ("" : "=m" (u) : "m" (u));
264 /* If a1 + u.d is exact, the only rounding happens during
265 scaling down. */
266 if (j == 0)
02bbfb41 267 return v.d * L(0x1p-228);
3e692e05
JJ
268 /* If result rounded to zero is not subnormal, no double
269 rounding will occur. */
82477c28 270 if (v.ieee.exponent > 228)
02bbfb41 271 return (a1 + u.d) * L(0x1p-228);
82477c28
JM
272 /* If v.d * 0x1p-228L with round to zero is a subnormal above
273 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
3e692e05
JJ
274 down just by 1 bit, which means v.ieee.mantissa3 |= j would
275 change the round bit, not sticky or guard bit.
82477c28 276 v.d * 0x1p-228L never normalizes by shifting up,
3e692e05
JJ
277 so round bit plus sticky bit should be already enough
278 for proper rounding. */
82477c28 279 if (v.ieee.exponent == 228)
3e692e05 280 {
ef82f4da
JM
281 /* If the exponent would be in the normal range when
282 rounding to normal precision with unbounded exponent
283 range, the exact result is known and spurious underflows
284 must be avoided on systems detecting tininess after
285 rounding. */
286 if (TININESS_AFTER_ROUNDING)
287 {
288 w.d = a1 + u.d;
82477c28 289 if (w.ieee.exponent == 229)
02bbfb41 290 return w.d * L(0x1p-228);
ef82f4da 291 }
3e692e05
JJ
292 /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
293 v.ieee.mantissa3 & 1 is the round bit and j is our sticky
8627a232 294 bit. */
02bbfb41 295 w.d = 0;
8627a232
JM
296 w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
297 w.ieee.negative = v.ieee.negative;
298 v.ieee.mantissa3 &= ~3U;
02bbfb41
PM
299 v.d *= L(0x1p-228);
300 w.d *= L(0x1p-2);
8627a232 301 return v.d + w.d;
3e692e05
JJ
302 }
303 v.ieee.mantissa3 |= j;
02bbfb41 304 return v.d * L(0x1p-228);
3e692e05 305 }
628d90c5 306#endif /* ! USE_FMAL_BUILTIN */
3e692e05 307}
fd3b4e7c 308libm_alias_ldouble (__fma, fma)
b3f27d81 309libm_alias_ldouble_narrow (__fma, fma)