]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/ldbl-96/s_fmal.c
Fix fma missing underflows and bad results for some subnormal results (bugs 14152...
[thirdparty/glibc.git] / sysdeps / ieee754 / ldbl-96 / s_fmal.c
CommitLineData
3e692e05 1/* Compute x * y + z as ternary operation.
4842e4fe 2 Copyright (C) 2010-2012 Free Software Foundation, Inc.
3e692e05
JJ
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
3e692e05
JJ
19
20#include <float.h>
21#include <math.h>
22#include <fenv.h>
23#include <ieee754.h>
4842e4fe 24#include <math_private.h>
3e692e05
JJ
25
26/* This implementation uses rounding to odd to avoid problems with
27 double rounding. See a paper by Boldo and Melquiond:
28 http://www.lri.fr/~melquion/doc/08-tc.pdf */
29
30long double
31__fmal (long double x, long double y, long double z)
32{
33 union ieee854_long_double u, v, w;
34 int adjust = 0;
35 u.d = x;
36 v.d = y;
37 w.d = z;
38 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
39 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS
40 - LDBL_MANT_DIG, 0)
41 || __builtin_expect (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
42 || __builtin_expect (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
43 || __builtin_expect (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG, 0)
44 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
45 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG, 0))
46 {
47 /* If z is Inf, but x and y are finite, the result should be
48 z rather than NaN. */
49 if (w.ieee.exponent == 0x7fff
50 && u.ieee.exponent != 0x7fff
51 && v.ieee.exponent != 0x7fff)
52 return (z + x) + y;
bec749fd
JM
53 /* If z is zero and x are y are nonzero, compute the result
54 as x * y to avoid the wrong sign of a zero result if x * y
55 underflows to 0. */
56 if (z == 0 && x != 0 && y != 0)
57 return x * y;
3e692e05
JJ
58 /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
59 or if x * y is less than half of LDBL_DENORM_MIN,
60 compute as x * y + z. */
61 if (u.ieee.exponent == 0x7fff
62 || v.ieee.exponent == 0x7fff
63 || w.ieee.exponent == 0x7fff
64 || u.ieee.exponent + v.ieee.exponent
65 > 0x7fff + IEEE854_LONG_DOUBLE_BIAS
66 || u.ieee.exponent + v.ieee.exponent
67 < IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG - 2)
68 return x * y + z;
69 if (u.ieee.exponent + v.ieee.exponent
70 >= 0x7fff + IEEE854_LONG_DOUBLE_BIAS - LDBL_MANT_DIG)
71 {
72 /* Compute 1p-64 times smaller result and multiply
73 at the end. */
74 if (u.ieee.exponent > v.ieee.exponent)
75 u.ieee.exponent -= LDBL_MANT_DIG;
76 else
77 v.ieee.exponent -= LDBL_MANT_DIG;
78 /* If x + y exponent is very large and z exponent is very small,
79 it doesn't matter if we don't adjust it. */
80 if (w.ieee.exponent > LDBL_MANT_DIG)
81 w.ieee.exponent -= LDBL_MANT_DIG;
82 adjust = 1;
83 }
84 else if (w.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
85 {
86 /* Similarly.
87 If z exponent is very large and x and y exponents are
88 very small, it doesn't matter if we don't adjust it. */
89 if (u.ieee.exponent > v.ieee.exponent)
90 {
91 if (u.ieee.exponent > LDBL_MANT_DIG)
92 u.ieee.exponent -= LDBL_MANT_DIG;
93 }
94 else if (v.ieee.exponent > LDBL_MANT_DIG)
95 v.ieee.exponent -= LDBL_MANT_DIG;
96 w.ieee.exponent -= LDBL_MANT_DIG;
97 adjust = 1;
98 }
99 else if (u.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
100 {
101 u.ieee.exponent -= LDBL_MANT_DIG;
102 if (v.ieee.exponent)
103 v.ieee.exponent += LDBL_MANT_DIG;
104 else
105 v.d *= 0x1p64L;
106 }
107 else if (v.ieee.exponent >= 0x7fff - LDBL_MANT_DIG)
108 {
109 v.ieee.exponent -= LDBL_MANT_DIG;
110 if (u.ieee.exponent)
111 u.ieee.exponent += LDBL_MANT_DIG;
112 else
113 u.d *= 0x1p64L;
114 }
115 else /* if (u.ieee.exponent + v.ieee.exponent
116 <= IEEE854_LONG_DOUBLE_BIAS + LDBL_MANT_DIG) */
117 {
118 if (u.ieee.exponent > v.ieee.exponent)
119 u.ieee.exponent += 2 * LDBL_MANT_DIG;
120 else
121 v.ieee.exponent += 2 * LDBL_MANT_DIG;
122 if (w.ieee.exponent <= 4 * LDBL_MANT_DIG + 4)
123 {
124 if (w.ieee.exponent)
125 w.ieee.exponent += 2 * LDBL_MANT_DIG;
126 else
127 w.d *= 0x1p128L;
128 adjust = -1;
129 }
130 /* Otherwise x * y should just affect inexact
131 and nothing else. */
132 }
133 x = u.d;
134 y = v.d;
135 z = w.d;
136 }
8ec5b013
JM
137
138 /* Ensure correct sign of exact 0 + 0. */
139 if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0))
140 return x * y + z;
141
3e692e05
JJ
142 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
143#define C ((1LL << (LDBL_MANT_DIG + 1) / 2) + 1)
144 long double x1 = x * C;
145 long double y1 = y * C;
146 long double m1 = x * y;
147 x1 = (x - x1) + x1;
148 y1 = (y - y1) + y1;
149 long double x2 = x - x1;
150 long double y2 = y - y1;
151 long double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
152
153 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
154 long double a1 = z + m1;
155 long double t1 = a1 - z;
156 long double t2 = a1 - t1;
157 t1 = m1 - t1;
158 t2 = z - t2;
159 long double a2 = t1 + t2;
160
161 fenv_t env;
162 feholdexcept (&env);
163 fesetround (FE_TOWARDZERO);
164 /* Perform m2 + a2 addition with round to odd. */
165 u.d = a2 + m2;
166
167 if (__builtin_expect (adjust == 0, 1))
168 {
169 if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
170 u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
171 feupdateenv (&env);
172 /* Result is a1 + u.d. */
173 return a1 + u.d;
174 }
175 else if (__builtin_expect (adjust > 0, 1))
176 {
177 if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7fff)
178 u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
179 feupdateenv (&env);
180 /* Result is a1 + u.d, scaled up. */
181 return (a1 + u.d) * 0x1p64L;
182 }
183 else
184 {
185 if ((u.ieee.mantissa1 & 1) == 0)
186 u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
187 v.d = a1 + u.d;
4842e4fe
JM
188 /* Ensure the addition is not scheduled after fetestexcept call. */
189 math_force_eval (v.d);
3e692e05
JJ
190 int j = fetestexcept (FE_INEXACT) != 0;
191 feupdateenv (&env);
192 /* Ensure the following computations are performed in default rounding
193 mode instead of just reusing the round to zero computation. */
194 asm volatile ("" : "=m" (u) : "m" (u));
195 /* If a1 + u.d is exact, the only rounding happens during
196 scaling down. */
197 if (j == 0)
198 return v.d * 0x1p-128L;
199 /* If result rounded to zero is not subnormal, no double
200 rounding will occur. */
201 if (v.ieee.exponent > 128)
202 return (a1 + u.d) * 0x1p-128L;
203 /* If v.d * 0x1p-128L with round to zero is a subnormal above
204 or equal to LDBL_MIN / 2, then v.d * 0x1p-128L shifts mantissa
205 down just by 1 bit, which means v.ieee.mantissa1 |= j would
206 change the round bit, not sticky or guard bit.
207 v.d * 0x1p-128L never normalizes by shifting up,
208 so round bit plus sticky bit should be already enough
209 for proper rounding. */
210 if (v.ieee.exponent == 128)
211 {
212 /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
213 v.ieee.mantissa1 & 1 is the round bit and j is our sticky
8627a232
JM
214 bit. */
215 w.d = 0.0L;
216 w.ieee.mantissa1 = ((v.ieee.mantissa1 & 3) << 1) | j;
217 w.ieee.negative = v.ieee.negative;
218 v.ieee.mantissa1 &= ~3U;
219 v.d *= 0x1p-128L;
220 w.d *= 0x1p-2L;
221 return v.d + w.d;
3e692e05
JJ
222 }
223 v.ieee.mantissa1 |= j;
224 return v.d * 0x1p-128L;
225 }
226}
227weak_alias (__fmal, fmal)