]>
Commit | Line | Data |
---|---|---|
3e692e05 | 1 | /* Compute x * y + z as ternary operation. |
dff8da6b | 2 | Copyright (C) 2010-2024 Free Software Foundation, Inc. |
3e692e05 | 3 | This file is part of the GNU C Library. |
3e692e05 JJ |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Lesser General Public | |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Lesser General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
3e692e05 | 18 | |
4b6574a6 | 19 | #define NO_MATH_REDIRECT |
3e692e05 | 20 | #include <float.h> |
b3f27d81 | 21 | #define f64xfmaf128 __hide_f64xfmaf128 |
3e692e05 | 22 | #include <math.h> |
b3f27d81 | 23 | #undef f64xfmaf128 |
3e692e05 JJ |
24 | #include <fenv.h> |
25 | #include <ieee754.h> | |
b4d5b8b0 | 26 | #include <math-barriers.h> |
4842e4fe | 27 | #include <math_private.h> |
fd3b4e7c | 28 | #include <libm-alias-ldouble.h> |
b3f27d81 | 29 | #include <math-narrow-alias.h> |
ef82f4da | 30 | #include <tininess.h> |
628d90c5 | 31 | #include <math-use-builtins.h> |
3e692e05 JJ |
32 | |
33 | /* This implementation uses rounding to odd to avoid problems with | |
34 | double rounding. See a paper by Boldo and Melquiond: | |
35 | http://www.lri.fr/~melquion/doc/08-tc.pdf */ | |
36 | ||
15089e04 PM |
37 | _Float128 |
38 | __fmal (_Float128 x, _Float128 y, _Float128 z) | |
3e692e05 | 39 | { |
628d90c5 VG |
40 | #if USE_FMAL_BUILTIN |
41 | return __builtin_fmal (x, y, z); | |
42 | #else | |
3e692e05 JJ |
43 | union ieee854_long_double u, v, w; |
44 | int adjust = 0; | |
45 | u.d = x; | |
46 | v.d = y; | |
47 | w.d = z; | |
48 | if (__builtin_expect (u.ieee.exponent + v.ieee.exponent | |
49 | >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS | |
50 | - LDBL_MANT_DIG, 0) | |
51 | || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | |
52 | || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | |
53 | || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0) | |
54 | || __builtin_expect (u.ieee.exponent + v.ieee.exponent | |
55 | <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0)) | |
56 | { | |
57 | /* If z is Inf, but x and y are finite, the result should be | |
58 | z rather than NaN. */ | |
59 | if (w.ieee.exponent == 0x7fff | |
60 | && u.ieee.exponent != 0x7fff | |
61 | && v.ieee.exponent != 0x7fff) | |
62 | return (z + x) + y; | |
bec749fd JM |
63 | /* If z is zero and x are y are nonzero, compute the result |
64 | as x * y to avoid the wrong sign of a zero result if x * y | |
65 | underflows to 0. */ | |
66 | if (z == 0 && x != 0 && y != 0) | |
67 | return x * y; | |
a0c2940d JM |
68 | /* If x or y or z is Inf/NaN, or if x * y is zero, compute as |
69 | x * y + z. */ | |
3e692e05 JJ |
70 | if (u.ieee.exponent == 0x7fff |
71 | || v.ieee.exponent == 0x7fff | |
72 | || w.ieee.exponent == 0x7fff | |
473611b2 JM |
73 | || x == 0 |
74 | || y == 0) | |
3e692e05 | 75 | return x * y + z; |
a0c2940d JM |
76 | /* If fma will certainly overflow, compute as x * y. */ |
77 | if (u.ieee.exponent + v.ieee.exponent | |
78 | > 0x7fff + IEEE854_LONG_DOUBLE_BIAS) | |
79 | return x * y; | |
1f4dafa3 | 80 | /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the |
473611b2 JM |
81 | result nor whether there is underflow depends on its exact |
82 | value, only on its sign. */ | |
83 | if (u.ieee.exponent + v.ieee.exponent | |
84 | < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2) | |
85 | { | |
86 | int neg = u.ieee.negative ^ v.ieee.negative; | |
02bbfb41 | 87 | _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494); |
473611b2 JM |
88 | if (w.ieee.exponent >= 3) |
89 | return tiny + z; | |
90 | /* Scaling up, adding TINY and scaling down produces the | |
91 | correct result, because in round-to-nearest mode adding | |
92 | TINY has no effect and in other modes double rounding is | |
93 | harmless. But it may not produce required underflow | |
94 | exceptions. */ | |
02bbfb41 | 95 | v.d = z * L(0x1p114) + tiny; |
473611b2 JM |
96 | if (TININESS_AFTER_ROUNDING |
97 | ? v.ieee.exponent < 115 | |
98 | : (w.ieee.exponent == 0 | |
99 | || (w.ieee.exponent == 1 | |
100 | && w.ieee.negative != neg | |
101 | && w.ieee.mantissa3 == 0 | |
102 | && w.ieee.mantissa2 == 0 | |
103 | && w.ieee.mantissa1 == 0 | |
104 | && w.ieee.mantissa0 == 0))) | |
105 | { | |
15089e04 | 106 | _Float128 force_underflow = x * y; |
d96164c3 | 107 | math_force_eval (force_underflow); |
473611b2 | 108 | } |
02bbfb41 | 109 | return v.d * L(0x1p-114); |
473611b2 | 110 | } |
3e692e05 JJ |
111 | if (u.ieee.exponent + v.ieee.exponent |
112 | >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG) | |
113 | { | |
114 | /* Compute 1p-113 times smaller result and multiply | |
115 | at the end. */ | |
116 | if (u.ieee.exponent > v.ieee.exponent) | |
117 | u.ieee.exponent -= LDBL_MANT_DIG; | |
118 | else | |
119 | v.ieee.exponent -= LDBL_MANT_DIG; | |
120 | /* If x + y exponent is very large and z exponent is very small, | |
121 | it doesn't matter if we don't adjust it. */ | |
122 | if (w.ieee.exponent > LDBL_MANT_DIG) | |
123 | w.ieee.exponent -= LDBL_MANT_DIG; | |
124 | adjust = 1; | |
125 | } | |
126 | else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | |
127 | { | |
128 | /* Similarly. | |
129 | If z exponent is very large and x and y exponents are | |
82477c28 JM |
130 | very small, adjust them up to avoid spurious underflows, |
131 | rather than down. */ | |
132 | if (u.ieee.exponent + v.ieee.exponent | |
739babd7 | 133 | <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG) |
82477c28 JM |
134 | { |
135 | if (u.ieee.exponent > v.ieee.exponent) | |
136 | u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | |
137 | else | |
138 | v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; | |
139 | } | |
140 | else if (u.ieee.exponent > v.ieee.exponent) | |
3e692e05 JJ |
141 | { |
142 | if (u.ieee.exponent > LDBL_MANT_DIG) | |
143 | u.ieee.exponent -= LDBL_MANT_DIG; | |
144 | } | |
145 | else if (v.ieee.exponent > LDBL_MANT_DIG) | |
146 | v.ieee.exponent -= LDBL_MANT_DIG; | |
147 | w.ieee.exponent -= LDBL_MANT_DIG; | |
148 | adjust = 1; | |
149 | } | |
150 | else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | |
151 | { | |
152 | u.ieee.exponent -= LDBL_MANT_DIG; | |
153 | if (v.ieee.exponent) | |
154 | v.ieee.exponent += LDBL_MANT_DIG; | |
155 | else | |
02bbfb41 | 156 | v.d *= L(0x1p113); |
3e692e05 JJ |
157 | } |
158 | else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG) | |
159 | { | |
160 | v.ieee.exponent -= LDBL_MANT_DIG; | |
161 | if (u.ieee.exponent) | |
162 | u.ieee.exponent += LDBL_MANT_DIG; | |
163 | else | |
02bbfb41 | 164 | u.d *= L(0x1p113); |
3e692e05 JJ |
165 | } |
166 | else /* if (u.ieee.exponent + v.ieee.exponent | |
167 | <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */ | |
168 | { | |
169 | if (u.ieee.exponent > v.ieee.exponent) | |
82477c28 | 170 | u.ieee.exponent += 2 * LDBL_MANT_DIG + 2; |
3e692e05 | 171 | else |
82477c28 JM |
172 | v.ieee.exponent += 2 * LDBL_MANT_DIG + 2; |
173 | if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6) | |
3e692e05 JJ |
174 | { |
175 | if (w.ieee.exponent) | |
82477c28 | 176 | w.ieee.exponent += 2 * LDBL_MANT_DIG + 2; |
3e692e05 | 177 | else |
02bbfb41 | 178 | w.d *= L(0x1p228); |
3e692e05 JJ |
179 | adjust = -1; |
180 | } | |
181 | /* Otherwise x * y should just affect inexact | |
182 | and nothing else. */ | |
183 | } | |
184 | x = u.d; | |
185 | y = v.d; | |
186 | z = w.d; | |
187 | } | |
8ec5b013 JM |
188 | |
189 | /* Ensure correct sign of exact 0 + 0. */ | |
a1ffb40e | 190 | if (__glibc_unlikely ((x == 0 || y == 0) && z == 0)) |
09245377 L |
191 | { |
192 | x = math_opt_barrier (x); | |
193 | return x * y + z; | |
194 | } | |
8ec5b013 | 195 | |
5b5b04d6 JM |
196 | fenv_t env; |
197 | feholdexcept (&env); | |
198 | fesetround (FE_TONEAREST); | |
199 | ||
3e692e05 JJ |
200 | /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ |
201 | #define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1) | |
15089e04 PM |
202 | _Float128 x1 = x * C; |
203 | _Float128 y1 = y * C; | |
204 | _Float128 m1 = x * y; | |
3e692e05 JJ |
205 | x1 = (x - x1) + x1; |
206 | y1 = (y - y1) + y1; | |
15089e04 PM |
207 | _Float128 x2 = x - x1; |
208 | _Float128 y2 = y - y1; | |
209 | _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2; | |
3e692e05 JJ |
210 | |
211 | /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */ | |
15089e04 PM |
212 | _Float128 a1 = z + m1; |
213 | _Float128 t1 = a1 - z; | |
214 | _Float128 t2 = a1 - t1; | |
3e692e05 JJ |
215 | t1 = m1 - t1; |
216 | t2 = z - t2; | |
15089e04 | 217 | _Float128 a2 = t1 + t2; |
4896f049 RH |
218 | /* Ensure the arithmetic is not scheduled after feclearexcept call. */ |
219 | math_force_eval (m2); | |
220 | math_force_eval (a2); | |
5b5b04d6 JM |
221 | feclearexcept (FE_INEXACT); |
222 | ||
4896f049 | 223 | /* If the result is an exact zero, ensure it has the correct sign. */ |
5b5b04d6 JM |
224 | if (a1 == 0 && m2 == 0) |
225 | { | |
226 | feupdateenv (&env); | |
4896f049 RH |
227 | /* Ensure that round-to-nearest value of z + m1 is not reused. */ |
228 | z = math_opt_barrier (z); | |
5b5b04d6 JM |
229 | return z + m1; |
230 | } | |
3e692e05 | 231 | |
3e692e05 JJ |
232 | fesetround (FE_TOWARDZERO); |
233 | /* Perform m2 + a2 addition with round to odd. */ | |
234 | u.d = a2 + m2; | |
235 | ||
a1ffb40e | 236 | if (__glibc_likely (adjust == 0)) |
3e692e05 JJ |
237 | { |
238 | if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff) | |
239 | u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0; | |
240 | feupdateenv (&env); | |
241 | /* Result is a1 + u.d. */ | |
242 | return a1 + u.d; | |
243 | } | |
a1ffb40e | 244 | else if (__glibc_likely (adjust > 0)) |
3e692e05 JJ |
245 | { |
246 | if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff) | |
247 | u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0; | |
248 | feupdateenv (&env); | |
249 | /* Result is a1 + u.d, scaled up. */ | |
02bbfb41 | 250 | return (a1 + u.d) * L(0x1p113); |
3e692e05 JJ |
251 | } |
252 | else | |
253 | { | |
254 | if ((u.ieee.mantissa3 & 1) == 0) | |
255 | u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0; | |
256 | v.d = a1 + u.d; | |
7c08a05c | 257 | /* Ensure the addition is not scheduled after fetestexcept call. */ |
4842e4fe | 258 | math_force_eval (v.d); |
3e692e05 JJ |
259 | int j = fetestexcept (FE_INEXACT) != 0; |
260 | feupdateenv (&env); | |
261 | /* Ensure the following computations are performed in default rounding | |
262 | mode instead of just reusing the round to zero computation. */ | |
263 | asm volatile ("" : "=m" (u) : "m" (u)); | |
264 | /* If a1 + u.d is exact, the only rounding happens during | |
265 | scaling down. */ | |
266 | if (j == 0) | |
02bbfb41 | 267 | return v.d * L(0x1p-228); |
3e692e05 JJ |
268 | /* If result rounded to zero is not subnormal, no double |
269 | rounding will occur. */ | |
82477c28 | 270 | if (v.ieee.exponent > 228) |
02bbfb41 | 271 | return (a1 + u.d) * L(0x1p-228); |
82477c28 JM |
272 | /* If v.d * 0x1p-228L with round to zero is a subnormal above |
273 | or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa | |
3e692e05 JJ |
274 | down just by 1 bit, which means v.ieee.mantissa3 |= j would |
275 | change the round bit, not sticky or guard bit. | |
82477c28 | 276 | v.d * 0x1p-228L never normalizes by shifting up, |
3e692e05 JJ |
277 | so round bit plus sticky bit should be already enough |
278 | for proper rounding. */ | |
82477c28 | 279 | if (v.ieee.exponent == 228) |
3e692e05 | 280 | { |
ef82f4da JM |
281 | /* If the exponent would be in the normal range when |
282 | rounding to normal precision with unbounded exponent | |
283 | range, the exact result is known and spurious underflows | |
284 | must be avoided on systems detecting tininess after | |
285 | rounding. */ | |
286 | if (TININESS_AFTER_ROUNDING) | |
287 | { | |
288 | w.d = a1 + u.d; | |
82477c28 | 289 | if (w.ieee.exponent == 229) |
02bbfb41 | 290 | return w.d * L(0x1p-228); |
ef82f4da | 291 | } |
3e692e05 JJ |
292 | /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding, |
293 | v.ieee.mantissa3 & 1 is the round bit and j is our sticky | |
8627a232 | 294 | bit. */ |
02bbfb41 | 295 | w.d = 0; |
8627a232 JM |
296 | w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j; |
297 | w.ieee.negative = v.ieee.negative; | |
298 | v.ieee.mantissa3 &= ~3U; | |
02bbfb41 PM |
299 | v.d *= L(0x1p-228); |
300 | w.d *= L(0x1p-2); | |
8627a232 | 301 | return v.d + w.d; |
3e692e05 JJ |
302 | } |
303 | v.ieee.mantissa3 |= j; | |
02bbfb41 | 304 | return v.d * L(0x1p-228); |
3e692e05 | 305 | } |
628d90c5 | 306 | #endif /* ! USE_FMAL_BUILTIN */ |
3e692e05 | 307 | } |
fd3b4e7c | 308 | libm_alias_ldouble (__fma, fma) |
b3f27d81 | 309 | libm_alias_ldouble_narrow (__fma, fma) |