]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/ldbl-128/s_fmal.c
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128 / s_fmal.c
CommitLineData
3e692e05 1/* Compute x * y + z as ternary operation.
2b778ceb 2 Copyright (C) 2010-2021 Free Software Foundation, Inc.
3e692e05
JJ
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6 17 License along with the GNU C Library; if not, see
5a82c748 18 <https://www.gnu.org/licenses/>. */
3e692e05
JJ
19
20#include <float.h>
21#include <math.h>
22#include <fenv.h>
23#include <ieee754.h>
b4d5b8b0 24#include <math-barriers.h>
4842e4fe 25#include <math_private.h>
fd3b4e7c 26#include <libm-alias-ldouble.h>
ef82f4da 27#include <tininess.h>
628d90c5 28#include <math-use-builtins.h>
3e692e05
JJ
29
30/* This implementation uses rounding to odd to avoid problems with
31 double rounding. See a paper by Boldo and Melquiond:
32 http://www.lri.fr/~melquion/doc/08-tc.pdf */
33
15089e04
PM
34_Float128
35__fmal (_Float128 x, _Float128 y, _Float128 z)
3e692e05 36{
628d90c5
VG
37#if USE_FMAL_BUILTIN
38 return __builtin_fmal (x, y, z);
39#else
3e692e05
JJ
40 union ieee854_long_double u, v, w;
41 int adjust = 0;
42 u.d = x;
43 v.d = y;
44 w.d = z;
45 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
46 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
47 - LDBL_MANT_DIG, 0)
48 || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
49 || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
50 || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
51 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
52 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
53 {
54 /* If z is Inf, but x and y are finite, the result should be
55 z rather than NaN. */
56 if (w.ieee.exponent == 0x7fff
57 && u.ieee.exponent != 0x7fff
58 && v.ieee.exponent != 0x7fff)
59 return (z + x) + y;
bec749fd
JM
60 /* If z is zero and x are y are nonzero, compute the result
61 as x * y to avoid the wrong sign of a zero result if x * y
62 underflows to 0. */
63 if (z == 0 && x != 0 && y != 0)
64 return x * y;
a0c2940d
JM
65 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
66 x * y + z. */
3e692e05
JJ
67 if (u.ieee.exponent == 0x7fff
68 || v.ieee.exponent == 0x7fff
69 || w.ieee.exponent == 0x7fff
473611b2
JM
70 || x == 0
71 || y == 0)
3e692e05 72 return x * y + z;
a0c2940d
JM
73 /* If fma will certainly overflow, compute as x * y. */
74 if (u.ieee.exponent + v.ieee.exponent
75 > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
76 return x * y;
1f4dafa3 77 /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
473611b2
JM
78 result nor whether there is underflow depends on its exact
79 value, only on its sign. */
80 if (u.ieee.exponent + v.ieee.exponent
81 < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
82 {
83 int neg = u.ieee.negative ^ v.ieee.negative;
02bbfb41 84 _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
473611b2
JM
85 if (w.ieee.exponent >= 3)
86 return tiny + z;
87 /* Scaling up, adding TINY and scaling down produces the
88 correct result, because in round-to-nearest mode adding
89 TINY has no effect and in other modes double rounding is
90 harmless. But it may not produce required underflow
91 exceptions. */
02bbfb41 92 v.d = z * L(0x1p114) + tiny;
473611b2
JM
93 if (TININESS_AFTER_ROUNDING
94 ? v.ieee.exponent < 115
95 : (w.ieee.exponent == 0
96 || (w.ieee.exponent == 1
97 && w.ieee.negative != neg
98 && w.ieee.mantissa3 == 0
99 && w.ieee.mantissa2 == 0
100 && w.ieee.mantissa1 == 0
101 && w.ieee.mantissa0 == 0)))
102 {
15089e04 103 _Float128 force_underflow = x * y;
d96164c3 104 math_force_eval (force_underflow);
473611b2 105 }
02bbfb41 106 return v.d * L(0x1p-114);
473611b2 107 }
3e692e05
JJ
108 if (u.ieee.exponent + v.ieee.exponent
109 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
110 {
111 /* Compute 1p-113 times smaller result and multiply
112 at the end. */
113 if (u.ieee.exponent > v.ieee.exponent)
114 u.ieee.exponent -= LDBL_MANT_DIG;
115 else
116 v.ieee.exponent -= LDBL_MANT_DIG;
117 /* If x + y exponent is very large and z exponent is very small,
118 it doesn't matter if we don't adjust it. */
119 if (w.ieee.exponent > LDBL_MANT_DIG)
120 w.ieee.exponent -= LDBL_MANT_DIG;
121 adjust = 1;
122 }
123 else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
124 {
125 /* Similarly.
126 If z exponent is very large and x and y exponents are
82477c28
JM
127 very small, adjust them up to avoid spurious underflows,
128 rather than down. */
129 if (u.ieee.exponent + v.ieee.exponent
739babd7 130 <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
82477c28
JM
131 {
132 if (u.ieee.exponent > v.ieee.exponent)
133 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
134 else
135 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
136 }
137 else if (u.ieee.exponent > v.ieee.exponent)
3e692e05
JJ
138 {
139 if (u.ieee.exponent > LDBL_MANT_DIG)
140 u.ieee.exponent -= LDBL_MANT_DIG;
141 }
142 else if (v.ieee.exponent > LDBL_MANT_DIG)
143 v.ieee.exponent -= LDBL_MANT_DIG;
144 w.ieee.exponent -= LDBL_MANT_DIG;
145 adjust = 1;
146 }
147 else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
148 {
149 u.ieee.exponent -= LDBL_MANT_DIG;
150 if (v.ieee.exponent)
151 v.ieee.exponent += LDBL_MANT_DIG;
152 else
02bbfb41 153 v.d *= L(0x1p113);
3e692e05
JJ
154 }
155 else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
156 {
157 v.ieee.exponent -= LDBL_MANT_DIG;
158 if (u.ieee.exponent)
159 u.ieee.exponent += LDBL_MANT_DIG;
160 else
02bbfb41 161 u.d *= L(0x1p113);
3e692e05
JJ
162 }
163 else /* if (u.ieee.exponent + v.ieee.exponent
164 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
165 {
166 if (u.ieee.exponent > v.ieee.exponent)
82477c28 167 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05 168 else
82477c28
JM
169 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
170 if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
3e692e05
JJ
171 {
172 if (w.ieee.exponent)
82477c28 173 w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05 174 else
02bbfb41 175 w.d *= L(0x1p228);
3e692e05
JJ
176 adjust = -1;
177 }
178 /* Otherwise x * y should just affect inexact
179 and nothing else. */
180 }
181 x = u.d;
182 y = v.d;
183 z = w.d;
184 }
8ec5b013
JM
185
186 /* Ensure correct sign of exact 0 + 0. */
a1ffb40e 187 if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
09245377
L
188 {
189 x = math_opt_barrier (x);
190 return x * y + z;
191 }
8ec5b013 192
5b5b04d6
JM
193 fenv_t env;
194 feholdexcept (&env);
195 fesetround (FE_TONEAREST);
196
3e692e05
JJ
197 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
198#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
15089e04
PM
199 _Float128 x1 = x * C;
200 _Float128 y1 = y * C;
201 _Float128 m1 = x * y;
3e692e05
JJ
202 x1 = (x - x1) + x1;
203 y1 = (y - y1) + y1;
15089e04
PM
204 _Float128 x2 = x - x1;
205 _Float128 y2 = y - y1;
206 _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
3e692e05
JJ
207
208 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
15089e04
PM
209 _Float128 a1 = z + m1;
210 _Float128 t1 = a1 - z;
211 _Float128 t2 = a1 - t1;
3e692e05
JJ
212 t1 = m1 - t1;
213 t2 = z - t2;
15089e04 214 _Float128 a2 = t1 + t2;
4896f049
RH
215 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
216 math_force_eval (m2);
217 math_force_eval (a2);
5b5b04d6
JM
218 feclearexcept (FE_INEXACT);
219
4896f049 220 /* If the result is an exact zero, ensure it has the correct sign. */
5b5b04d6
JM
221 if (a1 == 0 && m2 == 0)
222 {
223 feupdateenv (&env);
4896f049
RH
224 /* Ensure that round-to-nearest value of z + m1 is not reused. */
225 z = math_opt_barrier (z);
5b5b04d6
JM
226 return z + m1;
227 }
3e692e05 228
3e692e05
JJ
229 fesetround (FE_TOWARDZERO);
230 /* Perform m2 + a2 addition with round to odd. */
231 u.d = a2 + m2;
232
a1ffb40e 233 if (__glibc_likely (adjust == 0))
3e692e05
JJ
234 {
235 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
236 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
237 feupdateenv (&env);
238 /* Result is a1 + u.d. */
239 return a1 + u.d;
240 }
a1ffb40e 241 else if (__glibc_likely (adjust > 0))
3e692e05
JJ
242 {
243 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
244 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
245 feupdateenv (&env);
246 /* Result is a1 + u.d, scaled up. */
02bbfb41 247 return (a1 + u.d) * L(0x1p113);
3e692e05
JJ
248 }
249 else
250 {
251 if ((u.ieee.mantissa3 & 1) == 0)
252 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
253 v.d = a1 + u.d;
7c08a05c 254 /* Ensure the addition is not scheduled after fetestexcept call. */
4842e4fe 255 math_force_eval (v.d);
3e692e05
JJ
256 int j = fetestexcept (FE_INEXACT) != 0;
257 feupdateenv (&env);
258 /* Ensure the following computations are performed in default rounding
259 mode instead of just reusing the round to zero computation. */
260 asm volatile ("" : "=m" (u) : "m" (u));
261 /* If a1 + u.d is exact, the only rounding happens during
262 scaling down. */
263 if (j == 0)
02bbfb41 264 return v.d * L(0x1p-228);
3e692e05
JJ
265 /* If result rounded to zero is not subnormal, no double
266 rounding will occur. */
82477c28 267 if (v.ieee.exponent > 228)
02bbfb41 268 return (a1 + u.d) * L(0x1p-228);
82477c28
JM
269 /* If v.d * 0x1p-228L with round to zero is a subnormal above
270 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
3e692e05
JJ
271 down just by 1 bit, which means v.ieee.mantissa3 |= j would
272 change the round bit, not sticky or guard bit.
82477c28 273 v.d * 0x1p-228L never normalizes by shifting up,
3e692e05
JJ
274 so round bit plus sticky bit should be already enough
275 for proper rounding. */
82477c28 276 if (v.ieee.exponent == 228)
3e692e05 277 {
ef82f4da
JM
278 /* If the exponent would be in the normal range when
279 rounding to normal precision with unbounded exponent
280 range, the exact result is known and spurious underflows
281 must be avoided on systems detecting tininess after
282 rounding. */
283 if (TININESS_AFTER_ROUNDING)
284 {
285 w.d = a1 + u.d;
82477c28 286 if (w.ieee.exponent == 229)
02bbfb41 287 return w.d * L(0x1p-228);
ef82f4da 288 }
3e692e05
JJ
289 /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
290 v.ieee.mantissa3 & 1 is the round bit and j is our sticky
8627a232 291 bit. */
02bbfb41 292 w.d = 0;
8627a232
JM
293 w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
294 w.ieee.negative = v.ieee.negative;
295 v.ieee.mantissa3 &= ~3U;
02bbfb41
PM
296 v.d *= L(0x1p-228);
297 w.d *= L(0x1p-2);
8627a232 298 return v.d + w.d;
3e692e05
JJ
299 }
300 v.ieee.mantissa3 |= j;
02bbfb41 301 return v.d * L(0x1p-228);
3e692e05 302 }
628d90c5 303#endif /* ! USE_FMAL_BUILTIN */
3e692e05 304}
fd3b4e7c 305libm_alias_ldouble (__fma, fma)