]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgfortran/config/fpu-387.h
Fortran: F2018 rounding modes changes
[thirdparty/gcc.git] / libgfortran / config / fpu-387.h
CommitLineData
944b8b35 1/* FPU-related code for x86 and x86_64 processors.
7adcbafe 2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
944b8b35
FXC
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
4
5This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
7Libgfortran is free software; you can redistribute it and/or
8modify it under the terms of the GNU General Public
9License as published by the Free Software Foundation; either
748086b7 10version 3 of the License, or (at your option) any later version.
944b8b35
FXC
11
12Libgfortran is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
748086b7
JJ
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24<http://www.gnu.org/licenses/>. */
944b8b35 25
711df88d 26#ifndef __SSE_MATH__
c664bb1b
UB
27#include "cpuid.h"
28#endif
944b8b35
FXC
29
30static int
31has_sse (void)
32{
711df88d 33#ifndef __SSE_MATH__
944b8b35
FXC
34 unsigned int eax, ebx, ecx, edx;
35
c664bb1b 36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
944b8b35
FXC
37 return 0;
38
c664bb1b
UB
39 return edx & bit_SSE;
40#else
41 return 1;
944b8b35
FXC
42#endif
43}
44
82a4f54c 45/* i387 exceptions -- see linux <fpu_control.h> header file for details. */
944b8b35
FXC
46#define _FPU_MASK_IM 0x01
47#define _FPU_MASK_DM 0x02
48#define _FPU_MASK_ZM 0x04
49#define _FPU_MASK_OM 0x08
50#define _FPU_MASK_UM 0x10
51#define _FPU_MASK_PM 0x20
86c2f4b7
UB
52#define _FPU_MASK_ALL 0x3f
53
54#define _FPU_EX_ALL 0x3f
c664bb1b 55
82a4f54c
TB
56/* i387 rounding modes. */
57
58#define _FPU_RC_NEAREST 0x0
d7172355
UB
59#define _FPU_RC_DOWN 0x1
60#define _FPU_RC_UP 0x2
61#define _FPU_RC_ZERO 0x3
82a4f54c 62
d7172355 63#define _FPU_RC_MASK 0x3
82a4f54c 64
f5168e47
FXC
65/* Enable flush to zero mode. */
66
67#define MXCSR_FTZ (1 << 15)
68
69
8b198102
FXC
70/* This structure corresponds to the layout of the block
71 written by FSTENV. */
d3a1459c 72struct fenv
8b198102
FXC
73{
74 unsigned short int __control_word;
75 unsigned short int __unused1;
76 unsigned short int __status_word;
77 unsigned short int __unused2;
78 unsigned short int __tags;
79 unsigned short int __unused3;
80 unsigned int __eip;
81 unsigned short int __cs_selector;
d3a1459c
UB
82 unsigned int __opcode:11;
83 unsigned int __unused4:5;
8b198102
FXC
84 unsigned int __data_offset;
85 unsigned short int __data_selector;
86 unsigned short int __unused5;
87 unsigned int __mxcsr;
832c1192 88} __attribute__ ((gcc_struct));
8b198102 89
a709346f 90/* Check we can actually store the FPU state in the allocated size. */
d3a1459c 91_Static_assert (sizeof(struct fenv) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
a709346f
FXC
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
93
afb9b710 94#ifdef __SSE_MATH__
d3a1459c
UB
95# define __math_force_eval_div(x, y) \
96 do { \
97 __asm__ ("" : "+x" (x)); __asm__ __volatile__ ("" : : "x" (x / y)); \
98 } while (0)
afb9b710 99#else
d3a1459c
UB
100# define __math_force_eval_div(x, y) \
101 do { \
102 __asm__ ("" : "+t" (x)); __asm__ __volatile__ ("" : : "f" (x / y)); \
103 } while (0)
afb9b710 104#endif
a709346f 105
8b198102
FXC
106/* Raise the supported floating-point exceptions from EXCEPTS. Other
107 bits in EXCEPTS are ignored. Code originally borrowed from
108 libatomic/config/x86/fenv.c. */
109
110static void
111local_feraiseexcept (int excepts)
112{
d3a1459c
UB
113 struct fenv temp;
114
8b198102
FXC
115 if (excepts & _FPU_MASK_IM)
116 {
117 float f = 0.0f;
d3a1459c 118 __math_force_eval_div (f, f);
8b198102
FXC
119 }
120 if (excepts & _FPU_MASK_DM)
121 {
8b198102
FXC
122 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
123 temp.__status_word |= _FPU_MASK_DM;
124 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
125 __asm__ __volatile__ ("fwait");
126 }
127 if (excepts & _FPU_MASK_ZM)
128 {
129 float f = 1.0f, g = 0.0f;
d3a1459c 130 __math_force_eval_div (f, g);
8b198102
FXC
131 }
132 if (excepts & _FPU_MASK_OM)
133 {
8b198102
FXC
134 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
135 temp.__status_word |= _FPU_MASK_OM;
136 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
137 __asm__ __volatile__ ("fwait");
138 }
139 if (excepts & _FPU_MASK_UM)
140 {
8b198102
FXC
141 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
142 temp.__status_word |= _FPU_MASK_UM;
143 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
144 __asm__ __volatile__ ("fwait");
145 }
146 if (excepts & _FPU_MASK_PM)
147 {
148 float f = 1.0f, g = 3.0f;
d3a1459c 149 __math_force_eval_div (f, g);
8b198102
FXC
150 }
151}
152
82a4f54c
TB
153
154void
8b198102 155set_fpu_trap_exceptions (int trap, int notrap)
c664bb1b 156{
8b198102 157 int exc_set = 0, exc_clr = 0;
c664bb1b
UB
158 unsigned short cw;
159
8b198102
FXC
160 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
161 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
162 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
163 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
164 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
165 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
166
167 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
168 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
169 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
170 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
171 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
172 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
c664bb1b 173
8b198102 174 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
c664bb1b 175
8b198102
FXC
176 cw |= exc_clr;
177 cw &= ~exc_set;
c664bb1b 178
86c2f4b7 179 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
944b8b35
FXC
180
181 if (has_sse())
182 {
c664bb1b
UB
183 unsigned int cw_sse;
184
e8e69471 185 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
c664bb1b 186
86c2f4b7 187 /* The SSE exception masks are shifted by 7 bits. */
8b198102
FXC
188 cw_sse |= (exc_clr << 7);
189 cw_sse &= ~(exc_set << 7);
c664bb1b 190
86c2f4b7
UB
191 /* Clear stalled exception flags. */
192 cw_sse &= ~_FPU_EX_ALL;
c664bb1b 193
e8e69471 194 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
944b8b35
FXC
195 }
196}
fa86f4f9 197
8b198102
FXC
198void
199set_fpu (void)
200{
201 set_fpu_trap_exceptions (options.fpe, 0);
202}
203
204int
205get_fpu_trap_exceptions (void)
206{
8b198102 207 unsigned short cw;
9f8aa64a
UB
208 int mask;
209 int res = 0;
8b198102
FXC
210
211 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
9f8aa64a 212 mask = cw;
f8bdb6a5 213
8b198102
FXC
214 if (has_sse())
215 {
216 unsigned int cw_sse;
217
218 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
219
220 /* The SSE exception masks are shifted by 7 bits. */
9f8aa64a 221 mask |= (cw_sse >> 7);
8b198102
FXC
222 }
223
9f8aa64a
UB
224 mask = ~mask & _FPU_MASK_ALL;
225
226 if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
227 if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
228 if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
229 if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
230 if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
231 if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
8b198102
FXC
232
233 return res;
234}
235
236int
237support_fpu_trap (int flag __attribute__((unused)))
238{
239 return 1;
240}
241
fa86f4f9
TB
242int
243get_fpu_except_flags (void)
244{
fa86f4f9 245 unsigned short cw;
86c2f4b7 246 int excepts;
9f8aa64a 247 int res = 0;
fa86f4f9 248
8b198102 249 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
86c2f4b7 250 excepts = cw;
fa86f4f9
TB
251
252 if (has_sse())
253 {
254 unsigned int cw_sse;
e8e69471 255
fa86f4f9 256 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
86c2f4b7 257 excepts |= cw_sse;
fa86f4f9
TB
258 }
259
86c2f4b7
UB
260 excepts &= _FPU_EX_ALL;
261
9f8aa64a
UB
262 if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
263 if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
264 if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
265 if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
266 if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
267 if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
fa86f4f9 268
9f8aa64a 269 return res;
fa86f4f9 270}
82a4f54c 271
8b198102
FXC
272void
273set_fpu_except_flags (int set, int clear)
274{
d3a1459c 275 struct fenv temp;
8b198102
FXC
276 int exc_set = 0, exc_clr = 0;
277
278 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
279 if (set & GFC_FPE_INVALID)
280 exc_set |= _FPU_MASK_IM;
281 if (clear & GFC_FPE_INVALID)
282 exc_clr |= _FPU_MASK_IM;
283
284 if (set & GFC_FPE_DENORMAL)
285 exc_set |= _FPU_MASK_DM;
286 if (clear & GFC_FPE_DENORMAL)
287 exc_clr |= _FPU_MASK_DM;
288
289 if (set & GFC_FPE_ZERO)
290 exc_set |= _FPU_MASK_ZM;
291 if (clear & GFC_FPE_ZERO)
292 exc_clr |= _FPU_MASK_ZM;
293
294 if (set & GFC_FPE_OVERFLOW)
295 exc_set |= _FPU_MASK_OM;
296 if (clear & GFC_FPE_OVERFLOW)
297 exc_clr |= _FPU_MASK_OM;
298
299 if (set & GFC_FPE_UNDERFLOW)
300 exc_set |= _FPU_MASK_UM;
301 if (clear & GFC_FPE_UNDERFLOW)
302 exc_clr |= _FPU_MASK_UM;
303
304 if (set & GFC_FPE_INEXACT)
305 exc_set |= _FPU_MASK_PM;
306 if (clear & GFC_FPE_INEXACT)
307 exc_clr |= _FPU_MASK_PM;
308
309
310 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
311 FNSTSW but no FLDSW instruction. */
312 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
313 temp.__status_word &= ~exc_clr;
314 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
315
316 /* Change the flags on SSE. */
317
318 if (has_sse())
319 {
320 unsigned int cw_sse;
321
322 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
323 cw_sse &= ~exc_clr;
324 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
325 }
326
327 local_feraiseexcept (exc_set);
328}
329
330int
331support_fpu_flag (int flag __attribute__((unused)))
332{
333 return 1;
334}
335
82a4f54c
TB
336void
337set_fpu_rounding_mode (int round)
338{
339 int round_mode;
340 unsigned short cw;
341
342 switch (round)
343 {
344 case GFC_FPE_TONEAREST:
345 round_mode = _FPU_RC_NEAREST;
346 break;
347 case GFC_FPE_UPWARD:
348 round_mode = _FPU_RC_UP;
349 break;
350 case GFC_FPE_DOWNWARD:
351 round_mode = _FPU_RC_DOWN;
352 break;
353 case GFC_FPE_TOWARDZERO:
354 round_mode = _FPU_RC_ZERO;
355 break;
356 default:
357 return; /* Should be unreachable. */
358 }
359
360 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
361
d7172355
UB
362 /* The x87 round control bits are shifted by 10 bits. */
363 cw &= ~(_FPU_RC_MASK << 10);
364 cw |= round_mode << 10;
82a4f54c
TB
365
366 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
367
368 if (has_sse())
369 {
370 unsigned int cw_sse;
371
372 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
373
d7172355
UB
374 /* The SSE round control bits are shifted by 13 bits. */
375 cw_sse &= ~(_FPU_RC_MASK << 13);
376 cw_sse |= round_mode << 13;
82a4f54c
TB
377
378 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
379 }
380}
381
382int
383get_fpu_rounding_mode (void)
384{
d7172355
UB
385 int round_mode;
386
711df88d 387#ifdef __SSE_MATH__
d7172355
UB
388 unsigned int cw;
389
390 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
391
392 /* The SSE round control bits are shifted by 13 bits. */
393 round_mode = cw >> 13;
394#else
82a4f54c
TB
395 unsigned short cw;
396
397 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
398
d7172355
UB
399 /* The x87 round control bits are shifted by 10 bits. */
400 round_mode = cw >> 10;
401#endif
402
403 round_mode &= _FPU_RC_MASK;
82a4f54c 404
d7172355 405 switch (round_mode)
82a4f54c
TB
406 {
407 case _FPU_RC_NEAREST:
408 return GFC_FPE_TONEAREST;
409 case _FPU_RC_UP:
410 return GFC_FPE_UPWARD;
411 case _FPU_RC_DOWN:
412 return GFC_FPE_DOWNWARD;
413 case _FPU_RC_ZERO:
414 return GFC_FPE_TOWARDZERO;
415 default:
56710419 416 return 0; /* Should be unreachable. */
82a4f54c
TB
417 }
418}
8b198102
FXC
419
420int
4637a1d2 421support_fpu_rounding_mode (int mode)
8b198102 422{
4637a1d2
FXC
423 if (mode == GFC_FPE_AWAY)
424 return 0;
425 else
426 return 1;
8b198102
FXC
427}
428
429void
430get_fpu_state (void *state)
431{
d3a1459c 432 struct fenv *envp = state;
8b198102 433
8b198102
FXC
434 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
435
436 /* fnstenv has the side effect of masking all exceptions, so we need
437 to restore the control word after that. */
438 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
439
440 if (has_sse())
441 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
442}
443
444void
445set_fpu_state (void *state)
446{
d3a1459c 447 struct fenv *envp = state;
8b198102 448
8b198102
FXC
449 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
450 complex than this, but I think it suffices in our case. */
451 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
452
453 if (has_sse())
454 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
455}
456
f5168e47
FXC
457
458int
459support_fpu_underflow_control (int kind)
460{
461 if (!has_sse())
462 return 0;
463
464 return (kind == 4 || kind == 8) ? 1 : 0;
465}
466
467
468int
469get_fpu_underflow_mode (void)
470{
471 unsigned int cw_sse;
472
473 if (!has_sse())
474 return 1;
475
476 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
477
478 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
479 return (cw_sse & MXCSR_FTZ) ? 0 : 1;
480}
481
482
483void
484set_fpu_underflow_mode (int gradual)
485{
486 unsigned int cw_sse;
487
488 if (!has_sse())
489 return;
490
491 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
492
493 if (gradual)
494 cw_sse &= ~MXCSR_FTZ;
495 else
496 cw_sse |= MXCSR_FTZ;
497
498 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
499}
500