]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/ieee754/dbl-64/s_fma.c
Use i386 bits/wchar.h for i386 and x86-64
[thirdparty/glibc.git] / sysdeps / ieee754 / dbl-64 / s_fma.c
CommitLineData
5e908464 1/* Compute x * y + z as ternary operation.
d9a8d0ab 2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
5e908464
JJ
3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
5e908464
JJ
19
20#include <float.h>
21#include <math.h>
22#include <fenv.h>
23#include <ieee754.h>
d9a8d0ab 24#include <math_private.h>
5e908464
JJ
25
26/* This implementation uses rounding to odd to avoid problems with
27 double rounding. See a paper by Boldo and Melquiond:
28 http://www.lri.fr/~melquion/doc/08-tc.pdf */
29
30double
31__fma (double x, double y, double z)
32{
33 union ieee754_double u, v, w;
34 int adjust = 0;
35 u.d = x;
36 v.d = y;
37 w.d = z;
38 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
39 >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
40 || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
41 || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
f3f7372d
JJ
42 || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
43 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
44 <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
5e908464 45 {
3e692e05
JJ
46 /* If z is Inf, but x and y are finite, the result should be
47 z rather than NaN. */
48 if (w.ieee.exponent == 0x7ff
49 && u.ieee.exponent != 0x7ff
d9a8d0ab 50 && v.ieee.exponent != 0x7ff)
3e692e05 51 return (z + x) + y;
f3f7372d
JJ
52 /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
53 or if x * y is less than half of DBL_DENORM_MIN,
5e908464
JJ
54 compute as x * y + z. */
55 if (u.ieee.exponent == 0x7ff
56 || v.ieee.exponent == 0x7ff
57 || w.ieee.exponent == 0x7ff
58 || u.ieee.exponent + v.ieee.exponent
f3f7372d
JJ
59 > 0x7ff + IEEE754_DOUBLE_BIAS
60 || u.ieee.exponent + v.ieee.exponent
61 < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
5e908464
JJ
62 return x * y + z;
63 if (u.ieee.exponent + v.ieee.exponent
64 >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
65 {
66 /* Compute 1p-53 times smaller result and multiply
67 at the end. */
68 if (u.ieee.exponent > v.ieee.exponent)
69 u.ieee.exponent -= DBL_MANT_DIG;
70 else
71 v.ieee.exponent -= DBL_MANT_DIG;
72 /* If x + y exponent is very large and z exponent is very small,
73 it doesn't matter if we don't adjust it. */
74 if (w.ieee.exponent > DBL_MANT_DIG)
75 w.ieee.exponent -= DBL_MANT_DIG;
76 adjust = 1;
77 }
78 else if (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
79 {
80 /* Similarly.
81 If z exponent is very large and x and y exponents are
82 very small, it doesn't matter if we don't adjust it. */
83 if (u.ieee.exponent > v.ieee.exponent)
84 {
85 if (u.ieee.exponent > DBL_MANT_DIG)
86 u.ieee.exponent -= DBL_MANT_DIG;
87 }
88 else if (v.ieee.exponent > DBL_MANT_DIG)
89 v.ieee.exponent -= DBL_MANT_DIG;
90 w.ieee.exponent -= DBL_MANT_DIG;
91 adjust = 1;
92 }
93 else if (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
94 {
95 u.ieee.exponent -= DBL_MANT_DIG;
96 if (v.ieee.exponent)
97 v.ieee.exponent += DBL_MANT_DIG;
98 else
99 v.d *= 0x1p53;
100 }
f3f7372d 101 else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
5e908464
JJ
102 {
103 v.ieee.exponent -= DBL_MANT_DIG;
104 if (u.ieee.exponent)
105 u.ieee.exponent += DBL_MANT_DIG;
106 else
107 u.d *= 0x1p53;
108 }
f3f7372d
JJ
109 else /* if (u.ieee.exponent + v.ieee.exponent
110 <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
111 {
112 if (u.ieee.exponent > v.ieee.exponent)
113 u.ieee.exponent += 2 * DBL_MANT_DIG;
114 else
115 v.ieee.exponent += 2 * DBL_MANT_DIG;
116 if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
117 {
118 if (w.ieee.exponent)
119 w.ieee.exponent += 2 * DBL_MANT_DIG;
120 else
121 w.d *= 0x1p106;
122 adjust = -1;
123 }
124 /* Otherwise x * y should just affect inexact
125 and nothing else. */
126 }
5e908464
JJ
127 x = u.d;
128 y = v.d;
129 z = w.d;
130 }
131 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
132#define C ((1 << (DBL_MANT_DIG + 1) / 2) + 1)
133 double x1 = x * C;
134 double y1 = y * C;
135 double m1 = x * y;
136 x1 = (x - x1) + x1;
137 y1 = (y - y1) + y1;
138 double x2 = x - x1;
139 double y2 = y - y1;
140 double m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
141
142 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
143 double a1 = z + m1;
144 double t1 = a1 - z;
145 double t2 = a1 - t1;
146 t1 = m1 - t1;
147 t2 = z - t2;
148 double a2 = t1 + t2;
149
150 fenv_t env;
d9a8d0ab 151 libc_feholdexcept_setround (&env, FE_TOWARDZERO);
0fe0f1f8 152
5e908464
JJ
153 /* Perform m2 + a2 addition with round to odd. */
154 u.d = a2 + m2;
5e908464 155
0fe0f1f8
RH
156 if (__builtin_expect (adjust < 0, 0))
157 {
158 if ((u.ieee.mantissa1 & 1) == 0)
159 u.ieee.mantissa1 |= libc_fetestexcept (FE_INEXACT) != 0;
160 v.d = a1 + u.d;
161 }
162
163 /* Reset rounding mode and test for inexact simultaneously. */
164 int j = libc_feupdateenv_test (&env, FE_INEXACT) != 0;
165
f3f7372d
JJ
166 if (__builtin_expect (adjust == 0, 1))
167 {
168 if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8 169 u.ieee.mantissa1 |= j;
f3f7372d
JJ
170 /* Result is a1 + u.d. */
171 return a1 + u.d;
172 }
173 else if (__builtin_expect (adjust > 0, 1))
174 {
175 if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
0fe0f1f8 176 u.ieee.mantissa1 |= j;
f3f7372d
JJ
177 /* Result is a1 + u.d, scaled up. */
178 return (a1 + u.d) * 0x1p53;
179 }
180 else
181 {
f3f7372d
JJ
182 /* If a1 + u.d is exact, the only rounding happens during
183 scaling down. */
184 if (j == 0)
185 return v.d * 0x1p-106;
186 /* If result rounded to zero is not subnormal, no double
187 rounding will occur. */
188 if (v.ieee.exponent > 106)
189 return (a1 + u.d) * 0x1p-106;
190 /* If v.d * 0x1p-106 with round to zero is a subnormal above
191 or equal to DBL_MIN / 2, then v.d * 0x1p-106 shifts mantissa
192 down just by 1 bit, which means v.ieee.mantissa1 |= j would
193 change the round bit, not sticky or guard bit.
194 v.d * 0x1p-106 never normalizes by shifting up,
195 so round bit plus sticky bit should be already enough
196 for proper rounding. */
197 if (v.ieee.exponent == 106)
198 {
199 /* v.ieee.mantissa1 & 2 is LSB bit of the result before rounding,
200 v.ieee.mantissa1 & 1 is the round bit and j is our sticky
201 bit. In round-to-nearest 001 rounds down like 00,
202 011 rounds up, even though 01 rounds down (thus we need
203 to adjust), 101 rounds down like 10 and 111 rounds up
204 like 11. */
205 if ((v.ieee.mantissa1 & 3) == 1)
206 {
207 v.d *= 0x1p-106;
208 if (v.ieee.negative)
209 return v.d - 0x1p-1074 /* __DBL_DENORM_MIN__ */;
210 else
211 return v.d + 0x1p-1074 /* __DBL_DENORM_MIN__ */;
212 }
213 else
214 return v.d * 0x1p-106;
215 }
216 v.ieee.mantissa1 |= j;
217 return v.d * 0x1p-106;
218 }
5e908464
JJ
219}
220#ifndef __fma
221weak_alias (__fma, fma)
222#endif
223
224#ifdef NO_LONG_DOUBLE
225strong_alias (__fma, __fmal)
226weak_alias (__fmal, fmal)
227#endif