]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/ldbl-128/s_fmal.c
Move fenv.h soft-float inlines from fenv_private.h to include/fenv.h.
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-128 / s_fmal.c
CommitLineData
3e692e05 1/* Compute x * y + z as ternary operation.
688903eb 2 Copyright (C) 2010-2018 Free Software Foundation, Inc.
3e692e05
JJ
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
3e692e05
JJ
19
20#include <float.h>
21#include <math.h>
22#include <fenv.h>
23#include <ieee754.h>
b4d5b8b0 24#include <math-barriers.h>
4842e4fe 25#include <math_private.h>
fd3b4e7c 26#include <libm-alias-ldouble.h>
ef82f4da 27#include <tininess.h>
3e692e05
JJ
28
29/* This implementation uses rounding to odd to avoid problems with
30 double rounding. See a paper by Boldo and Melquiond:
31 http://www.lri.fr/~melquion/doc/08-tc.pdf */
32
15089e04
PM
33_Float128
34__fmal (_Float128 x, _Float128 y, _Float128 z)
3e692e05
JJ
35{
36 union ieee854_long_double u, v, w;
37 int adjust = 0;
38 u.d = x;
39 v.d = y;
40 w.d = z;
41 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
42 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
43 - LDBL_MANT_DIG, 0)
44 || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
45 || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
46 || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
47 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
48 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
49 {
50 /* If z is Inf, but x and y are finite, the result should be
51 z rather than NaN. */
52 if (w.ieee.exponent == 0x7fff
53 && u.ieee.exponent != 0x7fff
54 && v.ieee.exponent != 0x7fff)
55 return (z + x) + y;
bec749fd
JM
56 /* If z is zero and x are y are nonzero, compute the result
57 as x * y to avoid the wrong sign of a zero result if x * y
58 underflows to 0. */
59 if (z == 0 && x != 0 && y != 0)
60 return x * y;
a0c2940d
JM
61 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
62 x * y + z. */
3e692e05
JJ
63 if (u.ieee.exponent == 0x7fff
64 || v.ieee.exponent == 0x7fff
65 || w.ieee.exponent == 0x7fff
473611b2
JM
66 || x == 0
67 || y == 0)
3e692e05 68 return x * y + z;
a0c2940d
JM
69 /* If fma will certainly overflow, compute as x * y. */
70 if (u.ieee.exponent + v.ieee.exponent
71 > 0x7fff + IEEE854_LONG_DOUBLE_BIAS)
72 return x * y;
1f4dafa3 73 /* If x * y is less than 1/4 of LDBL_TRUE_MIN, neither the
473611b2
JM
74 result nor whether there is underflow depends on its exact
75 value, only on its sign. */
76 if (u.ieee.exponent + v.ieee.exponent
77 < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
78 {
79 int neg = u.ieee.negative ^ v.ieee.negative;
02bbfb41 80 _Float128 tiny = neg ? L(-0x1p-16494) : L(0x1p-16494);
473611b2
JM
81 if (w.ieee.exponent >= 3)
82 return tiny + z;
83 /* Scaling up, adding TINY and scaling down produces the
84 correct result, because in round-to-nearest mode adding
85 TINY has no effect and in other modes double rounding is
86 harmless. But it may not produce required underflow
87 exceptions. */
02bbfb41 88 v.d = z * L(0x1p114) + tiny;
473611b2
JM
89 if (TININESS_AFTER_ROUNDING
90 ? v.ieee.exponent < 115
91 : (w.ieee.exponent == 0
92 || (w.ieee.exponent == 1
93 && w.ieee.negative != neg
94 && w.ieee.mantissa3 == 0
95 && w.ieee.mantissa2 == 0
96 && w.ieee.mantissa1 == 0
97 && w.ieee.mantissa0 == 0)))
98 {
15089e04 99 _Float128 force_underflow = x * y;
d96164c3 100 math_force_eval (force_underflow);
473611b2 101 }
02bbfb41 102 return v.d * L(0x1p-114);
473611b2 103 }
3e692e05
JJ
104 if (u.ieee.exponent + v.ieee.exponent
105 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
106 {
107 /* Compute 1p-113 times smaller result and multiply
108 at the end. */
109 if (u.ieee.exponent > v.ieee.exponent)
110 u.ieee.exponent -= LDBL_MANT_DIG;
111 else
112 v.ieee.exponent -= LDBL_MANT_DIG;
113 /* If x + y exponent is very large and z exponent is very small,
114 it doesn't matter if we don't adjust it. */
115 if (w.ieee.exponent > LDBL_MANT_DIG)
116 w.ieee.exponent -= LDBL_MANT_DIG;
117 adjust = 1;
118 }
119 else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
120 {
121 /* Similarly.
122 If z exponent is very large and x and y exponents are
82477c28
JM
123 very small, adjust them up to avoid spurious underflows,
124 rather than down. */
125 if (u.ieee.exponent + v.ieee.exponent
739babd7 126 <= IEEE854_LONG_DOUBLE_BIAS + 2 * LDBL_MANT_DIG)
82477c28
JM
127 {
128 if (u.ieee.exponent > v.ieee.exponent)
129 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
130 else
131 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
132 }
133 else if (u.ieee.exponent > v.ieee.exponent)
3e692e05
JJ
134 {
135 if (u.ieee.exponent > LDBL_MANT_DIG)
136 u.ieee.exponent -= LDBL_MANT_DIG;
137 }
138 else if (v.ieee.exponent > LDBL_MANT_DIG)
139 v.ieee.exponent -= LDBL_MANT_DIG;
140 w.ieee.exponent -= LDBL_MANT_DIG;
141 adjust = 1;
142 }
143 else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
144 {
145 u.ieee.exponent -= LDBL_MANT_DIG;
146 if (v.ieee.exponent)
147 v.ieee.exponent += LDBL_MANT_DIG;
148 else
02bbfb41 149 v.d *= L(0x1p113);
3e692e05
JJ
150 }
151 else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
152 {
153 v.ieee.exponent -= LDBL_MANT_DIG;
154 if (u.ieee.exponent)
155 u.ieee.exponent += LDBL_MANT_DIG;
156 else
02bbfb41 157 u.d *= L(0x1p113);
3e692e05
JJ
158 }
159 else /* if (u.ieee.exponent + v.ieee.exponent
160 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
161 {
162 if (u.ieee.exponent > v.ieee.exponent)
82477c28 163 u.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05 164 else
82477c28
JM
165 v.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
166 if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 6)
3e692e05
JJ
167 {
168 if (w.ieee.exponent)
82477c28 169 w.ieee.exponent += 2 * LDBL_MANT_DIG + 2;
3e692e05 170 else
02bbfb41 171 w.d *= L(0x1p228);
3e692e05
JJ
172 adjust = -1;
173 }
174 /* Otherwise x * y should just affect inexact
175 and nothing else. */
176 }
177 x = u.d;
178 y = v.d;
179 z = w.d;
180 }
8ec5b013
JM
181
182 /* Ensure correct sign of exact 0 + 0. */
a1ffb40e 183 if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
09245377
L
184 {
185 x = math_opt_barrier (x);
186 return x * y + z;
187 }
8ec5b013 188
5b5b04d6
JM
189 fenv_t env;
190 feholdexcept (&env);
191 fesetround (FE_TONEAREST);
192
3e692e05
JJ
193 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
194#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
15089e04
PM
195 _Float128 x1 = x * C;
196 _Float128 y1 = y * C;
197 _Float128 m1 = x * y;
3e692e05
JJ
198 x1 = (x - x1) + x1;
199 y1 = (y - y1) + y1;
15089e04
PM
200 _Float128 x2 = x - x1;
201 _Float128 y2 = y - y1;
202 _Float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
3e692e05
JJ
203
204 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
15089e04
PM
205 _Float128 a1 = z + m1;
206 _Float128 t1 = a1 - z;
207 _Float128 t2 = a1 - t1;
3e692e05
JJ
208 t1 = m1 - t1;
209 t2 = z - t2;
15089e04 210 _Float128 a2 = t1 + t2;
4896f049
RH
211 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
212 math_force_eval (m2);
213 math_force_eval (a2);
5b5b04d6
JM
214 feclearexcept (FE_INEXACT);
215
4896f049 216 /* If the result is an exact zero, ensure it has the correct sign. */
5b5b04d6
JM
217 if (a1 == 0 && m2 == 0)
218 {
219 feupdateenv (&env);
4896f049
RH
220 /* Ensure that round-to-nearest value of z + m1 is not reused. */
221 z = math_opt_barrier (z);
5b5b04d6
JM
222 return z + m1;
223 }
3e692e05 224
3e692e05
JJ
225 fesetround (FE_TOWARDZERO);
226 /* Perform m2 + a2 addition with round to odd. */
227 u.d = a2 + m2;
228
a1ffb40e 229 if (__glibc_likely (adjust == 0))
3e692e05
JJ
230 {
231 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
232 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
233 feupdateenv (&env);
234 /* Result is a1 + u.d. */
235 return a1 + u.d;
236 }
a1ffb40e 237 else if (__glibc_likely (adjust > 0))
3e692e05
JJ
238 {
239 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
240 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
241 feupdateenv (&env);
242 /* Result is a1 + u.d, scaled up. */
02bbfb41 243 return (a1 + u.d) * L(0x1p113);
3e692e05
JJ
244 }
245 else
246 {
247 if ((u.ieee.mantissa3 & 1) == 0)
248 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
249 v.d = a1 + u.d;
7c08a05c 250 /* Ensure the addition is not scheduled after fetestexcept call. */
4842e4fe 251 math_force_eval (v.d);
3e692e05
JJ
252 int j = fetestexcept (FE_INEXACT) != 0;
253 feupdateenv (&env);
254 /* Ensure the following computations are performed in default rounding
255 mode instead of just reusing the round to zero computation. */
256 asm volatile ("" : "=m" (u) : "m" (u));
257 /* If a1 + u.d is exact, the only rounding happens during
258 scaling down. */
259 if (j == 0)
02bbfb41 260 return v.d * L(0x1p-228);
3e692e05
JJ
261 /* If result rounded to zero is not subnormal, no double
262 rounding will occur. */
82477c28 263 if (v.ieee.exponent > 228)
02bbfb41 264 return (a1 + u.d) * L(0x1p-228);
82477c28
JM
265 /* If v.d * 0x1p-228L with round to zero is a subnormal above
266 or equal to LDBL_MIN / 2, then v.d * 0x1p-228L shifts mantissa
3e692e05
JJ
267 down just by 1 bit, which means v.ieee.mantissa3 |= j would
268 change the round bit, not sticky or guard bit.
82477c28 269 v.d * 0x1p-228L never normalizes by shifting up,
3e692e05
JJ
270 so round bit plus sticky bit should be already enough
271 for proper rounding. */
82477c28 272 if (v.ieee.exponent == 228)
3e692e05 273 {
ef82f4da
JM
274 /* If the exponent would be in the normal range when
275 rounding to normal precision with unbounded exponent
276 range, the exact result is known and spurious underflows
277 must be avoided on systems detecting tininess after
278 rounding. */
279 if (TININESS_AFTER_ROUNDING)
280 {
281 w.d = a1 + u.d;
82477c28 282 if (w.ieee.exponent == 229)
02bbfb41 283 return w.d * L(0x1p-228);
ef82f4da 284 }
3e692e05
JJ
285 /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
286 v.ieee.mantissa3 & 1 is the round bit and j is our sticky
8627a232 287 bit. */
02bbfb41 288 w.d = 0;
8627a232
JM
289 w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
290 w.ieee.negative = v.ieee.negative;
291 v.ieee.mantissa3 &= ~3U;
02bbfb41
PM
292 v.d *= L(0x1p-228);
293 w.d *= L(0x1p-2);
8627a232 294 return v.d + w.d;
3e692e05
JJ
295 }
296 v.ieee.mantissa3 |= j;
02bbfb41 297 return v.d * L(0x1p-228);
3e692e05
JJ
298 }
299}
fd3b4e7c 300libm_alias_ldouble (__fma, fma)