]>
Commit | Line | Data |
---|---|---|
944b8b35 | 1 | /* FPU-related code for x86 and x86_64 processors. |
7adcbafe | 2 | Copyright (C) 2005-2022 Free Software Foundation, Inc. |
944b8b35 FXC |
3 | Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr> |
4 | ||
5 | This file is part of the GNU Fortran 95 runtime library (libgfortran). | |
6 | ||
7 | Libgfortran is free software; you can redistribute it and/or | |
8 | modify it under the terms of the GNU General Public | |
9 | License as published by the Free Software Foundation; either | |
748086b7 | 10 | version 3 of the License, or (at your option) any later version. |
944b8b35 FXC |
11 | |
12 | Libgfortran is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
748086b7 JJ |
17 | Under Section 7 of GPL version 3, you are granted additional |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
944b8b35 | 25 | |
711df88d | 26 | #ifndef __SSE_MATH__ |
c664bb1b UB |
27 | #include "cpuid.h" |
28 | #endif | |
944b8b35 FXC |
29 | |
30 | static int | |
31 | has_sse (void) | |
32 | { | |
711df88d | 33 | #ifndef __SSE_MATH__ |
944b8b35 FXC |
34 | unsigned int eax, ebx, ecx, edx; |
35 | ||
c664bb1b | 36 | if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) |
944b8b35 FXC |
37 | return 0; |
38 | ||
c664bb1b UB |
39 | return edx & bit_SSE; |
40 | #else | |
41 | return 1; | |
944b8b35 FXC |
42 | #endif |
43 | } | |
44 | ||
82a4f54c | 45 | /* i387 exceptions -- see linux <fpu_control.h> header file for details. */ |
944b8b35 FXC |
46 | #define _FPU_MASK_IM 0x01 |
47 | #define _FPU_MASK_DM 0x02 | |
48 | #define _FPU_MASK_ZM 0x04 | |
49 | #define _FPU_MASK_OM 0x08 | |
50 | #define _FPU_MASK_UM 0x10 | |
51 | #define _FPU_MASK_PM 0x20 | |
86c2f4b7 UB |
52 | #define _FPU_MASK_ALL 0x3f |
53 | ||
54 | #define _FPU_EX_ALL 0x3f | |
c664bb1b | 55 | |
82a4f54c TB |
56 | /* i387 rounding modes. */ |
57 | ||
58 | #define _FPU_RC_NEAREST 0x0 | |
d7172355 UB |
59 | #define _FPU_RC_DOWN 0x1 |
60 | #define _FPU_RC_UP 0x2 | |
61 | #define _FPU_RC_ZERO 0x3 | |
82a4f54c | 62 | |
d7172355 | 63 | #define _FPU_RC_MASK 0x3 |
82a4f54c | 64 | |
f5168e47 FXC |
65 | /* Enable flush to zero mode. */ |
66 | ||
67 | #define MXCSR_FTZ (1 << 15) | |
68 | ||
69 | ||
8b198102 FXC |
70 | /* This structure corresponds to the layout of the block |
71 | written by FSTENV. */ | |
d3a1459c | 72 | struct fenv |
8b198102 FXC |
73 | { |
74 | unsigned short int __control_word; | |
75 | unsigned short int __unused1; | |
76 | unsigned short int __status_word; | |
77 | unsigned short int __unused2; | |
78 | unsigned short int __tags; | |
79 | unsigned short int __unused3; | |
80 | unsigned int __eip; | |
81 | unsigned short int __cs_selector; | |
d3a1459c UB |
82 | unsigned int __opcode:11; |
83 | unsigned int __unused4:5; | |
8b198102 FXC |
84 | unsigned int __data_offset; |
85 | unsigned short int __data_selector; | |
86 | unsigned short int __unused5; | |
87 | unsigned int __mxcsr; | |
832c1192 | 88 | } __attribute__ ((gcc_struct)); |
8b198102 | 89 | |
a709346f | 90 | /* Check we can actually store the FPU state in the allocated size. */ |
d3a1459c | 91 | _Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE, |
a709346f FXC |
92 | "GFC_FPE_STATE_BUFFER_SIZE is too small"); |
93 | ||
afb9b710 | 94 | #ifdef __SSE_MATH__ |
d3a1459c UB |
95 | # define __math_force_eval_div(x, y) \ |
96 | do { \ | |
97 | __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \ | |
98 | } while (0) | |
afb9b710 | 99 | #else |
d3a1459c UB |
100 | # define __math_force_eval_div(x, y) \ |
101 | do { \ | |
102 | __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \ | |
103 | } while (0) | |
afb9b710 | 104 | #endif |
a709346f | 105 | |
8b198102 FXC |
106 | /* Raise the supported floating-point exceptions from EXCEPTS. Other |
107 | bits in EXCEPTS are ignored. Code originally borrowed from | |
108 | libatomic/config/x86/fenv.c. */ | |
109 | ||
110 | static void | |
111 | local_feraiseexcept (int excepts) | |
112 | { | |
d3a1459c UB |
113 | struct fenv temp; |
114 | ||
8b198102 FXC |
115 | if (excepts & _FPU_MASK_IM) |
116 | { | |
117 | float f = 0.0f; | |
d3a1459c | 118 | __math_force_eval_div (f, f); |
8b198102 FXC |
119 | } |
120 | if (excepts & _FPU_MASK_DM) | |
121 | { | |
8b198102 FXC |
122 | __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); |
123 | temp.__status_word |= _FPU_MASK_DM; | |
124 | __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); | |
125 | __asm__ __volatile__ ("fwait"); | |
126 | } | |
127 | if (excepts & _FPU_MASK_ZM) | |
128 | { | |
129 | float f = 1.0f, g = 0.0f; | |
d3a1459c | 130 | __math_force_eval_div (f, g); |
8b198102 FXC |
131 | } |
132 | if (excepts & _FPU_MASK_OM) | |
133 | { | |
8b198102 FXC |
134 | __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); |
135 | temp.__status_word |= _FPU_MASK_OM; | |
136 | __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); | |
137 | __asm__ __volatile__ ("fwait"); | |
138 | } | |
139 | if (excepts & _FPU_MASK_UM) | |
140 | { | |
8b198102 FXC |
141 | __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); |
142 | temp.__status_word |= _FPU_MASK_UM; | |
143 | __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); | |
144 | __asm__ __volatile__ ("fwait"); | |
145 | } | |
146 | if (excepts & _FPU_MASK_PM) | |
147 | { | |
148 | float f = 1.0f, g = 3.0f; | |
d3a1459c | 149 | __math_force_eval_div (f, g); |
8b198102 FXC |
150 | } |
151 | } | |
152 | ||
82a4f54c TB |
153 | |
154 | void | |
8b198102 | 155 | set_fpu_trap_exceptions (int trap, int notrap) |
c664bb1b | 156 | { |
8b198102 | 157 | int exc_set = 0, exc_clr = 0; |
c664bb1b UB |
158 | unsigned short cw; |
159 | ||
8b198102 FXC |
160 | if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM; |
161 | if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM; | |
162 | if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM; | |
163 | if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM; | |
164 | if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM; | |
165 | if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM; | |
166 | ||
167 | if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM; | |
168 | if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM; | |
169 | if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM; | |
170 | if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM; | |
171 | if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM; | |
172 | if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM; | |
c664bb1b | 173 | |
8b198102 | 174 | __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); |
c664bb1b | 175 | |
8b198102 FXC |
176 | cw |= exc_clr; |
177 | cw &= ~exc_set; | |
c664bb1b | 178 | |
86c2f4b7 | 179 | __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw)); |
944b8b35 FXC |
180 | |
181 | if (has_sse()) | |
182 | { | |
c664bb1b UB |
183 | unsigned int cw_sse; |
184 | ||
e8e69471 | 185 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); |
c664bb1b | 186 | |
86c2f4b7 | 187 | /* The SSE exception masks are shifted by 7 bits. */ |
8b198102 FXC |
188 | cw_sse |= (exc_clr << 7); |
189 | cw_sse &= ~(exc_set << 7); | |
c664bb1b | 190 | |
86c2f4b7 UB |
191 | /* Clear stalled exception flags. */ |
192 | cw_sse &= ~_FPU_EX_ALL; | |
c664bb1b | 193 | |
e8e69471 | 194 | __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); |
944b8b35 FXC |
195 | } |
196 | } | |
fa86f4f9 | 197 | |
8b198102 FXC |
198 | void |
199 | set_fpu (void) | |
200 | { | |
201 | set_fpu_trap_exceptions (options.fpe, 0); | |
202 | } | |
203 | ||
204 | int | |
205 | get_fpu_trap_exceptions (void) | |
206 | { | |
8b198102 | 207 | unsigned short cw; |
9f8aa64a UB |
208 | int mask; |
209 | int res = 0; | |
8b198102 FXC |
210 | |
211 | __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw)); | |
9f8aa64a | 212 | mask = cw; |
f8bdb6a5 | 213 | |
8b198102 FXC |
214 | if (has_sse()) |
215 | { | |
216 | unsigned int cw_sse; | |
217 | ||
218 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); | |
219 | ||
220 | /* The SSE exception masks are shifted by 7 bits. */ | |
9f8aa64a | 221 | mask |= (cw_sse >> 7); |
8b198102 FXC |
222 | } |
223 | ||
9f8aa64a UB |
224 | mask = ~mask & _FPU_MASK_ALL; |
225 | ||
226 | if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID; | |
227 | if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; | |
228 | if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; | |
229 | if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; | |
230 | if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; | |
231 | if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; | |
8b198102 FXC |
232 | |
233 | return res; | |
234 | } | |
235 | ||
236 | int | |
237 | support_fpu_trap (int flag __attribute__((unused))) | |
238 | { | |
239 | return 1; | |
240 | } | |
241 | ||
fa86f4f9 TB |
242 | int |
243 | get_fpu_except_flags (void) | |
244 | { | |
fa86f4f9 | 245 | unsigned short cw; |
86c2f4b7 | 246 | int excepts; |
9f8aa64a | 247 | int res = 0; |
fa86f4f9 | 248 | |
8b198102 | 249 | __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw)); |
86c2f4b7 | 250 | excepts = cw; |
fa86f4f9 TB |
251 | |
252 | if (has_sse()) | |
253 | { | |
254 | unsigned int cw_sse; | |
e8e69471 | 255 | |
fa86f4f9 | 256 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); |
86c2f4b7 | 257 | excepts |= cw_sse; |
fa86f4f9 TB |
258 | } |
259 | ||
86c2f4b7 UB |
260 | excepts &= _FPU_EX_ALL; |
261 | ||
9f8aa64a UB |
262 | if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID; |
263 | if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL; | |
264 | if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO; | |
265 | if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW; | |
266 | if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW; | |
267 | if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT; | |
fa86f4f9 | 268 | |
9f8aa64a | 269 | return res; |
fa86f4f9 | 270 | } |
82a4f54c | 271 | |
8b198102 FXC |
272 | void |
273 | set_fpu_except_flags (int set, int clear) | |
274 | { | |
d3a1459c | 275 | struct fenv temp; |
8b198102 FXC |
276 | int exc_set = 0, exc_clr = 0; |
277 | ||
278 | /* Translate from GFC_PE_* values to _FPU_MASK_* values. */ | |
279 | if (set & GFC_FPE_INVALID) | |
280 | exc_set |= _FPU_MASK_IM; | |
281 | if (clear & GFC_FPE_INVALID) | |
282 | exc_clr |= _FPU_MASK_IM; | |
283 | ||
284 | if (set & GFC_FPE_DENORMAL) | |
285 | exc_set |= _FPU_MASK_DM; | |
286 | if (clear & GFC_FPE_DENORMAL) | |
287 | exc_clr |= _FPU_MASK_DM; | |
288 | ||
289 | if (set & GFC_FPE_ZERO) | |
290 | exc_set |= _FPU_MASK_ZM; | |
291 | if (clear & GFC_FPE_ZERO) | |
292 | exc_clr |= _FPU_MASK_ZM; | |
293 | ||
294 | if (set & GFC_FPE_OVERFLOW) | |
295 | exc_set |= _FPU_MASK_OM; | |
296 | if (clear & GFC_FPE_OVERFLOW) | |
297 | exc_clr |= _FPU_MASK_OM; | |
298 | ||
299 | if (set & GFC_FPE_UNDERFLOW) | |
300 | exc_set |= _FPU_MASK_UM; | |
301 | if (clear & GFC_FPE_UNDERFLOW) | |
302 | exc_clr |= _FPU_MASK_UM; | |
303 | ||
304 | if (set & GFC_FPE_INEXACT) | |
305 | exc_set |= _FPU_MASK_PM; | |
306 | if (clear & GFC_FPE_INEXACT) | |
307 | exc_clr |= _FPU_MASK_PM; | |
308 | ||
309 | ||
310 | /* Change the flags. This is tricky on 387 (unlike SSE), because we have | |
311 | FNSTSW but no FLDSW instruction. */ | |
312 | __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp)); | |
313 | temp.__status_word &= ~exc_clr; | |
314 | __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp)); | |
315 | ||
316 | /* Change the flags on SSE. */ | |
317 | ||
318 | if (has_sse()) | |
319 | { | |
320 | unsigned int cw_sse; | |
321 | ||
322 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); | |
323 | cw_sse &= ~exc_clr; | |
324 | __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); | |
325 | } | |
326 | ||
327 | local_feraiseexcept (exc_set); | |
328 | } | |
329 | ||
330 | int | |
331 | support_fpu_flag (int flag __attribute__((unused))) | |
332 | { | |
333 | return 1; | |
334 | } | |
335 | ||
82a4f54c TB |
336 | void |
337 | set_fpu_rounding_mode (int round) | |
338 | { | |
339 | int round_mode; | |
340 | unsigned short cw; | |
341 | ||
342 | switch (round) | |
343 | { | |
344 | case GFC_FPE_TONEAREST: | |
345 | round_mode = _FPU_RC_NEAREST; | |
346 | break; | |
347 | case GFC_FPE_UPWARD: | |
348 | round_mode = _FPU_RC_UP; | |
349 | break; | |
350 | case GFC_FPE_DOWNWARD: | |
351 | round_mode = _FPU_RC_DOWN; | |
352 | break; | |
353 | case GFC_FPE_TOWARDZERO: | |
354 | round_mode = _FPU_RC_ZERO; | |
355 | break; | |
356 | default: | |
357 | return; /* Should be unreachable. */ | |
358 | } | |
359 | ||
360 | __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw)); | |
361 | ||
d7172355 UB |
362 | /* The x87 round control bits are shifted by 10 bits. */ |
363 | cw &= ~(_FPU_RC_MASK << 10); | |
364 | cw |= round_mode << 10; | |
82a4f54c TB |
365 | |
366 | __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw)); | |
367 | ||
368 | if (has_sse()) | |
369 | { | |
370 | unsigned int cw_sse; | |
371 | ||
372 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); | |
373 | ||
d7172355 UB |
374 | /* The SSE round control bits are shifted by 13 bits. */ |
375 | cw_sse &= ~(_FPU_RC_MASK << 13); | |
376 | cw_sse |= round_mode << 13; | |
82a4f54c TB |
377 | |
378 | __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); | |
379 | } | |
380 | } | |
381 | ||
382 | int | |
383 | get_fpu_rounding_mode (void) | |
384 | { | |
d7172355 UB |
385 | int round_mode; |
386 | ||
711df88d | 387 | #ifdef __SSE_MATH__ |
d7172355 UB |
388 | unsigned int cw; |
389 | ||
390 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw)); | |
391 | ||
392 | /* The SSE round control bits are shifted by 13 bits. */ | |
393 | round_mode = cw >> 13; | |
394 | #else | |
82a4f54c TB |
395 | unsigned short cw; |
396 | ||
397 | __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw)); | |
398 | ||
d7172355 UB |
399 | /* The x87 round control bits are shifted by 10 bits. */ |
400 | round_mode = cw >> 10; | |
401 | #endif | |
402 | ||
403 | round_mode &= _FPU_RC_MASK; | |
82a4f54c | 404 | |
d7172355 | 405 | switch (round_mode) |
82a4f54c TB |
406 | { |
407 | case _FPU_RC_NEAREST: | |
408 | return GFC_FPE_TONEAREST; | |
409 | case _FPU_RC_UP: | |
410 | return GFC_FPE_UPWARD; | |
411 | case _FPU_RC_DOWN: | |
412 | return GFC_FPE_DOWNWARD; | |
413 | case _FPU_RC_ZERO: | |
414 | return GFC_FPE_TOWARDZERO; | |
415 | default: | |
56710419 | 416 | return 0; /* Should be unreachable. */ |
82a4f54c TB |
417 | } |
418 | } | |
8b198102 FXC |
419 | |
420 | int | |
4637a1d2 | 421 | support_fpu_rounding_mode (int mode) |
8b198102 | 422 | { |
4637a1d2 FXC |
423 | if (mode == GFC_FPE_AWAY) |
424 | return 0; | |
425 | else | |
426 | return 1; | |
8b198102 FXC |
427 | } |
428 | ||
429 | void | |
430 | get_fpu_state (void *state) | |
431 | { | |
d3a1459c | 432 | struct fenv *envp = state; |
8b198102 | 433 | |
8b198102 FXC |
434 | __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp)); |
435 | ||
436 | /* fnstenv has the side effect of masking all exceptions, so we need | |
437 | to restore the control word after that. */ | |
438 | __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word)); | |
439 | ||
440 | if (has_sse()) | |
441 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr)); | |
442 | } | |
443 | ||
444 | void | |
445 | set_fpu_state (void *state) | |
446 | { | |
d3a1459c | 447 | struct fenv *envp = state; |
8b198102 | 448 | |
8b198102 FXC |
449 | /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more |
450 | complex than this, but I think it suffices in our case. */ | |
451 | __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp)); | |
452 | ||
453 | if (has_sse()) | |
454 | __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr)); | |
455 | } | |
456 | ||
f5168e47 FXC |
457 | |
458 | int | |
459 | support_fpu_underflow_control (int kind) | |
460 | { | |
461 | if (!has_sse()) | |
462 | return 0; | |
463 | ||
464 | return (kind == 4 || kind == 8) ? 1 : 0; | |
465 | } | |
466 | ||
467 | ||
468 | int | |
469 | get_fpu_underflow_mode (void) | |
470 | { | |
471 | unsigned int cw_sse; | |
472 | ||
473 | if (!has_sse()) | |
474 | return 1; | |
475 | ||
476 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); | |
477 | ||
478 | /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */ | |
479 | return (cw_sse & MXCSR_FTZ) ? 0 : 1; | |
480 | } | |
481 | ||
482 | ||
483 | void | |
484 | set_fpu_underflow_mode (int gradual) | |
485 | { | |
486 | unsigned int cw_sse; | |
487 | ||
488 | if (!has_sse()) | |
489 | return; | |
490 | ||
491 | __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse)); | |
492 | ||
493 | if (gradual) | |
494 | cw_sse &= ~MXCSR_FTZ; | |
495 | else | |
496 | cw_sse |= MXCSR_FTZ; | |
497 | ||
498 | __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse)); | |
499 | } | |
500 |