]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/fpu/fenv_private.h
Make fma use of Dekker and Knuth algorithms use round-to-nearest (bug 14796).
[thirdparty/glibc.git] / sysdeps / i386 / fpu / fenv_private.h
1 #ifndef FENV_PRIVATE_H
2 #define FENV_PRIVATE_H 1
3
4 #include <fenv.h>
5 #include <fpu_control.h>
6
7 #ifdef __SSE2_MATH__
8 # define math_opt_barrier(x) \
9 ({ __typeof(x) __x; \
10 if (sizeof (x) <= sizeof (double)) \
11 __asm ("" : "=x" (__x) : "0" (x)); \
12 else \
13 __asm ("" : "=t" (__x) : "0" (x)); \
14 __x; })
15 # define math_force_eval(x) \
16 do { \
17 if (sizeof (x) <= sizeof (double)) \
18 __asm __volatile ("" : : "x" (x)); \
19 else \
20 __asm __volatile ("" : : "f" (x)); \
21 } while (0)
22 #else
23 # define math_opt_barrier(x) \
24 ({ __typeof (x) __x; \
25 __asm ("" : "=t" (__x) : "0" (x)); \
26 __x; })
27 # define math_force_eval(x) \
28 do { \
29 __typeof (x) __x = (x); \
30 if (sizeof (x) <= sizeof (double)) \
31 __asm __volatile ("" : : "m" (__x)); \
32 else \
33 __asm __volatile ("" : : "f" (__x)); \
34 } while (0)
35 #endif
36
37 /* This file is used by both the 32- and 64-bit ports. The 64-bit port
38 has a field in the fenv_t for the mxcsr; the 32-bit port does not.
39 Instead, we (ab)use the only 32-bit field extant in the struct. */
40 #ifndef __x86_64__
41 # define __mxcsr __eip
42 #endif
43
44
45 /* All of these functions are private to libm, and are all used in pairs
46 to save+change the fp state and restore the original state. Thus we
47 need not care for both the 387 and the sse unit, only the one we're
48 actually using. */
49
50 #if defined __AVX__ || defined SSE2AVX
51 # define STMXCSR "vstmxcsr"
52 # define LDMXCSR "vldmxcsr"
53 #else
54 # define STMXCSR "stmxcsr"
55 # define LDMXCSR "ldmxcsr"
56 #endif
57
58 static __always_inline void
59 libc_feholdexcept_sse (fenv_t *e)
60 {
61 unsigned int mxcsr;
62 asm (STMXCSR " %0" : "=m" (*&mxcsr));
63 e->__mxcsr = mxcsr;
64 mxcsr = (mxcsr | 0x1f80) & ~0x3f;
65 asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
66 }
67
68 static __always_inline void
69 libc_feholdexcept_387 (fenv_t *e)
70 {
71 /* Recall that fnstenv has a side-effect of masking exceptions.
72 Clobber all of the fp registers so that the TOS field is 0. */
73 asm volatile ("fnstenv %0; fnclex"
74 : "=m"(*e)
75 : : "st", "st(1)", "st(2)", "st(3)",
76 "st(4)", "st(5)", "st(6)", "st(7)");
77 }
78
79 static __always_inline void
80 libc_fesetround_sse (int r)
81 {
82 unsigned int mxcsr;
83 asm (STMXCSR " %0" : "=m" (*&mxcsr));
84 mxcsr = (mxcsr & ~0x6000) | (r << 3);
85 asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
86 }
87
88 static __always_inline void
89 libc_fesetround_387 (int r)
90 {
91 fpu_control_t cw;
92 _FPU_GETCW (cw);
93 cw = (cw & ~0xc00) | r;
94 _FPU_SETCW (cw);
95 }
96
97 static __always_inline void
98 libc_feholdexcept_setround_sse (fenv_t *e, int r)
99 {
100 unsigned int mxcsr;
101 asm (STMXCSR " %0" : "=m" (*&mxcsr));
102 e->__mxcsr = mxcsr;
103 mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
104 asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
105 }
106
107 /* Set both rounding mode and precision. A convenience function for use
108 by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */
109 static __always_inline void
110 libc_feholdexcept_setround_387_prec (fenv_t *e, int r)
111 {
112 libc_feholdexcept_387 (e);
113
114 fpu_control_t cw = e->__control_word;
115 cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
116 cw |= r | 0x3f;
117 _FPU_SETCW (cw);
118 }
119
120 static __always_inline void
121 libc_feholdexcept_setround_387 (fenv_t *e, int r)
122 {
123 libc_feholdexcept_setround_387_prec (e, r | _FPU_EXTENDED);
124 }
125
126 static __always_inline void
127 libc_feholdexcept_setround_387_53bit (fenv_t *e, int r)
128 {
129 libc_feholdexcept_setround_387_prec (e, r | _FPU_DOUBLE);
130 }
131
132 static __always_inline int
133 libc_fetestexcept_sse (int e)
134 {
135 unsigned int mxcsr;
136 asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
137 return mxcsr & e & FE_ALL_EXCEPT;
138 }
139
140 static __always_inline int
141 libc_fetestexcept_387 (int ex)
142 {
143 fexcept_t temp;
144 asm volatile ("fnstsw %0" : "=a" (temp));
145 return temp & ex & FE_ALL_EXCEPT;
146 }
147
148 static __always_inline void
149 libc_fesetenv_sse (fenv_t *e)
150 {
151 asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr));
152 }
153
154 static __always_inline void
155 libc_fesetenv_387 (fenv_t *e)
156 {
157 /* Clobber all fp registers so that the TOS value we saved earlier is
158 compatible with the current state of the compiler. */
159 asm volatile ("fldenv %0"
160 : : "m" (*e)
161 : "st", "st(1)", "st(2)", "st(3)",
162 "st(4)", "st(5)", "st(6)", "st(7)");
163 }
164
165 static __always_inline int
166 libc_feupdateenv_test_sse (fenv_t *e, int ex)
167 {
168 unsigned int mxcsr, old_mxcsr, cur_ex;
169 asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
170 cur_ex = mxcsr & FE_ALL_EXCEPT;
171
172 /* Merge current exceptions with the old environment. */
173 old_mxcsr = e->__mxcsr;
174 mxcsr = old_mxcsr | cur_ex;
175 asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
176
177 /* Raise SIGFPE for any new exceptions since the hold. Expect that
178 the normal environment has all exceptions masked. */
179 if (__builtin_expect ((old_mxcsr >> 7) & cur_ex, 0))
180 __feraiseexcept (cur_ex);
181
182 /* Test for exceptions raised since the hold. */
183 return cur_ex & ex;
184 }
185
186 static __always_inline int
187 libc_feupdateenv_test_387 (fenv_t *e, int ex)
188 {
189 fexcept_t cur_ex;
190
191 /* Save current exceptions. */
192 asm volatile ("fnstsw %0" : "=a" (cur_ex));
193 cur_ex &= FE_ALL_EXCEPT;
194
195 /* Reload original environment. */
196 libc_fesetenv_387 (e);
197
198 /* Merge current exceptions. */
199 __feraiseexcept (cur_ex);
200
201 /* Test for exceptions raised since the hold. */
202 return cur_ex & ex;
203 }
204
205 static __always_inline void
206 libc_feupdateenv_sse (fenv_t *e)
207 {
208 libc_feupdateenv_test_sse (e, 0);
209 }
210
211 static __always_inline void
212 libc_feupdateenv_387 (fenv_t *e)
213 {
214 libc_feupdateenv_test_387 (e, 0);
215 }
216
217 static __always_inline void
218 libc_feholdsetround_sse (fenv_t *e, int r)
219 {
220 unsigned int mxcsr;
221 asm (STMXCSR " %0" : "=m" (*&mxcsr));
222 e->__mxcsr = mxcsr;
223 mxcsr = (mxcsr & ~0x6000) | (r << 3);
224 asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
225 }
226
227 static __always_inline void
228 libc_feholdsetround_387_prec (fenv_t *e, int r)
229 {
230 fpu_control_t cw;
231
232 _FPU_GETCW (cw);
233 e->__control_word = cw;
234 cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
235 cw |= r;
236 _FPU_SETCW (cw);
237 }
238
239 static __always_inline void
240 libc_feholdsetround_387 (fenv_t *e, int r)
241 {
242 libc_feholdsetround_387_prec (e, r | _FPU_EXTENDED);
243 }
244
245 static __always_inline void
246 libc_feholdsetround_387_53bit (fenv_t *e, int r)
247 {
248 libc_feholdsetround_387_prec (e, r | _FPU_DOUBLE);
249 }
250
251 static __always_inline void
252 libc_feresetround_sse (fenv_t *e)
253 {
254 unsigned int mxcsr;
255 asm (STMXCSR " %0" : "=m" (*&mxcsr));
256 mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
257 asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
258 }
259
260 static __always_inline void
261 libc_feresetround_387 (fenv_t *e)
262 {
263 _FPU_SETCW (e->__control_word);
264 }
265
266 #ifdef __SSE_MATH__
267 # define libc_feholdexceptf libc_feholdexcept_sse
268 # define libc_fesetroundf libc_fesetround_sse
269 # define libc_feholdexcept_setroundf libc_feholdexcept_setround_sse
270 # define libc_fetestexceptf libc_fetestexcept_sse
271 # define libc_fesetenvf libc_fesetenv_sse
272 # define libc_feupdateenv_testf libc_feupdateenv_test_sse
273 # define libc_feupdateenvf libc_feupdateenv_sse
274 # define libc_feholdsetroundf libc_feholdsetround_sse
275 # define libc_feresetroundf libc_feresetround_sse
276 #else
277 # define libc_feholdexceptf libc_feholdexcept_387
278 # define libc_fesetroundf libc_fesetround_387
279 # define libc_feholdexcept_setroundf libc_feholdexcept_setround_387
280 # define libc_fetestexceptf libc_fetestexcept_387
281 # define libc_fesetenvf libc_fesetenv_387
282 # define libc_feupdateenv_testf libc_feupdateenv_test_387
283 # define libc_feupdateenvf libc_feupdateenv_387
284 # define libc_feholdsetroundf libc_feholdsetround_387
285 # define libc_feresetroundf libc_feresetround_387
286 #endif /* __SSE_MATH__ */
287
288 #ifdef __SSE2_MATH__
289 # define libc_feholdexcept libc_feholdexcept_sse
290 # define libc_fesetround libc_fesetround_sse
291 # define libc_feholdexcept_setround libc_feholdexcept_setround_sse
292 # define libc_fetestexcept libc_fetestexcept_sse
293 # define libc_fesetenv libc_fesetenv_sse
294 # define libc_feupdateenv_test libc_feupdateenv_test_sse
295 # define libc_feupdateenv libc_feupdateenv_sse
296 # define libc_feholdsetround libc_feholdsetround_sse
297 # define libc_feresetround libc_feresetround_sse
298 #else
299 # define libc_feholdexcept libc_feholdexcept_387
300 # define libc_fesetround libc_fesetround_387
301 # define libc_feholdexcept_setround libc_feholdexcept_setround_387
302 # define libc_fetestexcept libc_fetestexcept_387
303 # define libc_fesetenv libc_fesetenv_387
304 # define libc_feupdateenv_test libc_feupdateenv_test_387
305 # define libc_feupdateenv libc_feupdateenv_387
306 # define libc_feholdsetround libc_feholdsetround_387
307 # define libc_feresetround libc_feresetround_387
308 #endif /* __SSE2_MATH__ */
309
310 #define libc_feholdexceptl libc_feholdexcept_387
311 #define libc_fesetroundl libc_fesetround_387
312 #define libc_feholdexcept_setroundl libc_feholdexcept_setround_387
313 #define libc_fetestexceptl libc_fetestexcept_387
314 #define libc_fesetenvl libc_fesetenv_387
315 #define libc_feupdateenv_testl libc_feupdateenv_test_387
316 #define libc_feupdateenvl libc_feupdateenv_387
317 #define libc_feholdsetroundl libc_feholdsetround_387
318 #define libc_feresetroundl libc_feresetround_387
319
320 #ifndef __SSE2_MATH__
321 # define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit
322 # define libc_feholdsetround_53bit libc_feholdsetround_387_53bit
323 #endif
324
325 #undef __mxcsr
326
327 #endif /* FENV_PRIVATE_H */