]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/dbl-64/s_fma.c
Fix sign of exact zero return from fma (bug 14638).
[thirdparty/glibc.git] / sysdeps / ieee754 / dbl-64 / s_fma.c
CommitLineData
5e908464 1/* Compute x * y + z as ternary operation.
efb73488 2 Copyright (C) 2010-2012 Free Software Foundation, Inc.
5e908464
JJ
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
5e908464
JJ
19
20#include <float.h>
21#include <math.h>
22#include <fenv.h>
23#include <ieee754.h>
d9a8d0ab 24#include <math_private.h>
5e908464
JJ
25
26/* This implementation uses rounding to odd to avoid problems with
27 double rounding. See a paper by Boldo and Melquiond:
28 http://www.lri.fr/~melquion/doc/08-tc.pdf */
29
30double
31__fma (double x, double y, double z)
32{
33 union ieee754_double u, v, w;
34 int adjust = 0;
35 u.d = x;
36 v.d = y;
37 w.d = z;
38 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
39 >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
40 || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
41 || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
f3f7372d
JJ
42 || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
43 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
44 <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
5e908464 45 {
3e692e05
JJ
46 /* If z is Inf, but x and y are finite, the result should be
47 z rather than NaN. */
48 if (w.ieee.exponent == 0x7ff
49 && u.ieee.exponent != 0x7ff
d9a8d0ab 50 && v.ieee.exponent != 0x7ff)
3e692e05 51 return (z + x) + y;
f3f7372d
JJ
52 /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
53 or if x * y is less than half of DBL_DENORM_MIN,
5e908464
JJ
54 compute as x * y + z. */
55 if (u.ieee.exponent == 0x7ff
56 || v.ieee.exponent == 0x7ff
57 || w.ieee.exponent == 0x7ff
58 || u.ieee.exponent + v.ieee.exponent
f3f7372d
JJ
59 > 0x7ff + IEEE754_DOUBLE_BIAS
60 || u.ieee.exponent + v.ieee.exponent
61 < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
5e908464
JJ
62 return x * y + z;
63 if (u.ieee.exponent + v.ieee.exponent
64 >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
65 {
66 /* Compute 1p-53 times smaller result and multiply
67 at the end. */
68 if (u.ieee.exponent > v.ieee.exponent)
69 u.ieee.exponent -= DBL_MANT_DIG;
70 else
71 v.ieee.exponent -= DBL_MANT_DIG;
72 /* If x + y exponent is very large and z exponent is very small,
73 it doesn't matter if we don't adjust it. */
74 if (w.ieee.exponent > DBL_MANT_DIG)
75 w.ieee.exponent -= DBL_MANT_DIG;
76 adjust = 1;
77 }
78 else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
79 {
80 /* Similarly.
81 If z exponent is very large and x and y exponents are
82 very small, it doesn't matter if we don't adjust it. */
83 if (u.ieee.exponent > v.ieee.exponent)
84 {
85 if (u.ieee.exponent > DBL_MANT_DIG)
86 u.ieee.exponent -= DBL_MANT_DIG;
87 }
88 else if (v.ieee.exponent > DBL_MANT_DIG)
89 v.ieee.exponent -= DBL_MANT_DIG;
90 w.ieee.exponent -= DBL_MANT_DIG;
91 adjust = 1;
92 }
93 else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
94 {
95 u.ieee.exponent -= DBL_MANT_DIG;
96 if (v.ieee.exponent)
97 v.ieee.exponent += DBL_MANT_DIG;
98 else
99 v.d *= 0x1p53;
100 }
f3f7372d 101 else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
5e908464
JJ
102 {
103 v.ieee.exponent -= DBL_MANT_DIG;
104 if (u.ieee.exponent)
105 u.ieee.exponent += DBL_MANT_DIG;
106 else
107 u.d *= 0x1p53;
108 }
f3f7372d
JJ
109 else /* if (u.ieee.exponent + v.ieee.exponent
110 <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
111 {
112 if (u.ieee.exponent > v.ieee.exponent)
113 u.ieee.exponent += 2 * DBL_MANT_DIG;
114 else
115 v.ieee.exponent += 2 * DBL_MANT_DIG;
116 if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
117 {
118 if (w.ieee.exponent)
119 w.ieee.exponent += 2 * DBL_MANT_DIG;
120 else
121 w.d *= 0x1p106;
122 adjust = -1;
123 }
124 /* Otherwise x * y should just affect inexact
125 and nothing else. */
126 }
5e908464
JJ
127 x = u.d;
128 y = v.d;
129 z = w.d;
130 }
8ec5b013
JM
131
132 /* Ensure correct sign of exact 0 + 0. */
133 if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0))
134 return x * y + z;
135
5e908464
JJ
136 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
137#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
138 double x1 = x * C;
139 double y1 = y * C;
140 double m1 = x * y;
141 x1 = (x - x1) + x1;
142 y1 = (y - y1) + y1;
143 double x2 = x - x1;
144 double y2 = y - y1;
145 double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
146
147 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
148 double a1 = z + m1;
149 double t1 = a1 - z;
150 double t2 = a1 - t1;
151 t1 = m1 - t1;
152 t2 = z - t2;
153 double a2 = t1 + t2;
154
155 fenv_t env;
d9a8d0ab 156 libc_feholdexcept_setround (&env, FE_TOWARDZERO);
0fe0f1f8 157
5e908464
JJ
158 /* Perform m2 + a2 addition with round to odd. */
159 u.d = a2 + m2;
5e908464 160
0fe0f1f8
RH
161 if (__builtin_expect (adjust < 0, 0))
162 {
163 if ((u.ieee.mantissa1 & 1) == 0)
164 u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
165 v.d = a1 + u.d;
efb73488
AJ
166 /* Ensure the addition is not scheduled after fetestexcept call. */
167 math_force_eval (v.d);
0fe0f1f8
RH
168 }
169
170 /* Reset rounding mode and test for inexact simultaneously. */
171 int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
172
f3f7372d
JJ
173 if (__builtin_expect (adjust == 0, 1))
174 {
175 if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8 176 u.ieee.mantissa1 |= j;
f3f7372d
JJ
177 /* Result is a1 + u.d. */
178 return a1 + u.d;
179 }
180 else if (__builtin_expect (adjust > 0, 1))
181 {
182 if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8 183 u.ieee.mantissa1 |= j;
f3f7372d
JJ
184 /* Result is a1 + u.d, scaled up. */
185 return (a1 + u.d) * 0x1p53;
186 }
187 else
188 {
f3f7372d
JJ
189 /* If a1 + u.d is exact, the only rounding happens during
190 scaling down. */
191 if (j == 0)
192 return v.d * 0x1p-106;
193 /* If result rounded to zero is not subnormal, no double
194 rounding will occur. */
195 if (v.ieee.exponent > 106)
196 return (a1 + u.d) * 0x1p-106;
197 /* If v.d * 0x1p-106 with round to zero is a subnormal above
198 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
199 down just by 1 bit, which means v.ieee.mantissa1 |= j would
200 change the round bit, not sticky or guard bit.
201 v.d * 0x1p-106 never normalizes by shifting up,
202 so round bit plus sticky bit should be already enough
203 for proper rounding. */
204 if (v.ieee.exponent == 106)
205 {
206 /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
207 v.ieee.mantissa1 & 1 is the round bit and j is our sticky
208 bit. In round-to-nearest 001 rounds down like 00,
209 011 rounds up, even though 01 rounds down (thus we need
210 to adjust), 101 rounds down like 10 and 111 rounds up
211 like 11. */
212 if ((v.ieee.mantissa1 & 3) == 1)
213 {
214 v.d *= 0x1p-106;
215 if (v.ieee.negative)
216 return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
217 else
218 return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
219 }
220 else
221 return v.d * 0x1p-106;
222 }
223 v.ieee.mantissa1 |= j;
224 return v.d * 0x1p-106;
225 }
5e908464
JJ
226}
227#ifndef __fma
228weak_alias (__fma, fma)
229#endif
230
231#ifdef NO_LONG_DOUBLE
232strong_alias (__fma, __fmal)
233weak_alias (__fmal, fmal)
234#endif