]>
Commit | Line | Data |
---|---|---|
1f205a47 | 1 | /* Internal libc stuff for floating point environment routines. |
04277e02 | 2 | Copyright (C) 1997-2019 Free Software Foundation, Inc. |
1f205a47 UD |
3 | This file is part of the GNU C Library. |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
1f205a47 UD |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 13 | Lesser General Public License for more details. |
1f205a47 | 14 | |
41bdb6e2 | 15 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 16 | License along with the GNU C Library; if not, see |
5a82c748 | 17 | <https://www.gnu.org/licenses/>. */ |
1f205a47 UD |
18 | |
19 | #ifndef _FENV_LIBC_H | |
20 | #define _FENV_LIBC_H 1 | |
21 | ||
22 | #include <fenv.h> | |
edba7a54 UD |
23 | #include <ldsodefs.h> |
24 | #include <sysdep.h> | |
1f205a47 | 25 | |
bd12ab55 | 26 | extern const fenv_t *__fe_nomask_env_priv (void); |
7a2ad8cf | 27 | |
41e8926a | 28 | extern const fenv_t *__fe_mask_env (void) attribute_hidden; |
246ec411 | 29 | |
e3d85df5 PC |
30 | /* If the old env had any enabled exceptions and the new env has no enabled |
31 | exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the | |
32 | FPU to run faster because it always takes the default action and can not | |
33 | generate SIGFPE. */ | |
34 | #define __TEST_AND_ENTER_NON_STOP(old, new) \ | |
35 | do { \ | |
36 | if (((old) & FPSCR_ENABLES_MASK) != 0 && ((new) & FPSCR_ENABLES_MASK) == 0) \ | |
37 | (void) __fe_mask_env (); \ | |
38 | } while (0) | |
39 | ||
40 | /* If the old env has no enabled exceptions and the new env has any enabled | |
41 | exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the | |
42 | hardware into "precise mode" and may cause the FPU to run slower on some | |
43 | hardware. */ | |
44 | #define __TEST_AND_EXIT_NON_STOP(old, new) \ | |
45 | do { \ | |
46 | if (((old) & FPSCR_ENABLES_MASK) == 0 && ((new) & FPSCR_ENABLES_MASK) != 0) \ | |
47 | (void) __fe_nomask_env_priv (); \ | |
48 | } while (0) | |
49 | ||
1f205a47 UD |
50 | /* The sticky bits in the FPSCR indicating exceptions have occurred. */ |
51 | #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID) | |
52 | ||
53 | /* Equivalent to fegetenv, but returns a fenv_t instead of taking a | |
54 | pointer. */ | |
10cce669 | 55 | #define fegetenv_register() __builtin_mffs() |
1f205a47 | 56 | |
3db85a98 | 57 | /* Equivalent to fegetenv_register, but only returns bits for |
0b3c9e57 PC |
58 | status, exception enables, and mode. |
59 | Nicely, it turns out that the 'mffsl' instruction will decode to | |
60 | 'mffs' on architectures older than "power9" because the additional | |
61 | bits set for 'mffsl' are "don't care" for 'mffs'. 'mffs' is a superset | |
62 | of 'mffsl'. */ | |
81ecb0ee | 63 | #define fegetenv_control() \ |
3db85a98 PC |
64 | ({register double __fr; \ |
65 | __asm__ __volatile__ ( \ | |
66 | ".machine push; .machine \"power9\"; mffsl %0; .machine pop" \ | |
67 | : "=f" (__fr)); \ | |
68 | __fr; \ | |
69 | }) | |
70 | ||
f1c56cdf PC |
71 | #define __fe_mffscrn(rn) \ |
72 | ({register fenv_union_t __fr; \ | |
73 | if (__builtin_constant_p (rn)) \ | |
74 | __asm__ __volatile__ ( \ | |
75 | ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \ | |
76 | : "=f" (__fr.fenv) : "i" (rn)); \ | |
77 | else \ | |
78 | { \ | |
79 | __fr.l = (rn); \ | |
80 | __asm__ __volatile__ ( \ | |
81 | ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \ | |
82 | : "=f" (__fr.fenv) : "f" (__fr.fenv)); \ | |
83 | } \ | |
84 | __fr.fenv; \ | |
85 | }) | |
86 | ||
81ecb0ee | 87 | /* Like fegetenv_control, but also sets the rounding mode. */ |
f1c56cdf PC |
88 | #ifdef _ARCH_PWR9 |
89 | #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn) | |
90 | #else | |
91 | /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary | |
92 | but not sufficient, because it does not set the rounding mode. | |
93 | Explicitly set the rounding mode when 'mffscrn' actually doesn't. */ | |
94 | #define fegetenv_and_set_rn(rn) \ | |
95 | ({register fenv_union_t __fr; \ | |
96 | __fr.fenv = __fe_mffscrn (rn); \ | |
97 | if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \ | |
98 | __fesetround_inline (rn); \ | |
99 | __fr.fenv; \ | |
100 | }) | |
101 | #endif | |
102 | ||
1f205a47 UD |
103 | /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */ |
104 | #define fesetenv_register(env) \ | |
edba7a54 UD |
105 | do { \ |
106 | double d = (env); \ | |
107 | if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ | |
1454da21 UD |
108 | asm volatile (".machine push; " \ |
109 | ".machine \"power6\"; " \ | |
110 | "mtfsf 0xff,%0,1,0; " \ | |
111 | ".machine pop" : : "f" (d)); \ | |
edba7a54 | 112 | else \ |
10cce669 | 113 | __builtin_mtfsf (0xff, d); \ |
edba7a54 | 114 | } while(0) |
1f205a47 | 115 | |
3c1766ea PC |
116 | /* Set the last 2 nibbles of the FPSCR, which contain the |
117 | exception enables and the rounding mode. | |
81ecb0ee | 118 | 'fegetenv_control' retrieves these bits by reading the FPSCR. */ |
d7a568af | 119 | #define fesetenv_control(env) __builtin_mtfsf (0b00000011, (env)); |
3c1766ea | 120 | |
1f205a47 UD |
121 | /* This very handy macro: |
122 | - Sets the rounding mode to 'round to nearest'; | |
123 | - Sets the processor into IEEE mode; and | |
124 | - Prevents exceptions from being raised for inexact results. | |
125 | These things happen to be exactly what you need for typical elementary | |
126 | functions. */ | |
edba7a54 UD |
127 | #define relax_fenv_state() \ |
128 | do { \ | |
1454da21 | 129 | if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ |
49bc41b6 | 130 | asm volatile (".machine push; .machine \"power6\"; " \ |
1454da21 | 131 | "mtfsfi 7,0,1; .machine pop"); \ |
49bc41b6 | 132 | asm volatile ("mtfsfi 7,0"); \ |
edba7a54 | 133 | } while(0) |
1f205a47 | 134 | |
0413b54c UD |
135 | /* Set/clear a particular FPSCR bit (for instance, |
136 | reset_fpscr_bit(FPSCR_VE); | |
137 | prevents INVALID exceptions from being raised). */ | |
138 | #define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "i"(x)) | |
139 | #define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "i"(x)) | |
140 | ||
1f205a47 UD |
141 | typedef union |
142 | { | |
143 | fenv_t fenv; | |
4a28b3ca | 144 | unsigned long long l; |
1f205a47 UD |
145 | } fenv_union_t; |
146 | ||
5c68d401 | 147 | |
5c68d401 | 148 | static inline int |
01238691 | 149 | __fesetround_inline (int round) |
5c68d401 | 150 | { |
e68b1151 PC |
151 | #ifdef _ARCH_PWR9 |
152 | __fe_mffscrn (round); | |
153 | #else | |
154 | if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)) | |
155 | __fe_mffscrn (round); | |
156 | else if ((unsigned int) round < 2) | |
5c68d401 RM |
157 | { |
158 | asm volatile ("mtfsb0 30"); | |
159 | if ((unsigned int) round == 0) | |
160 | asm volatile ("mtfsb0 31"); | |
161 | else | |
162 | asm volatile ("mtfsb1 31"); | |
163 | } | |
164 | else | |
165 | { | |
166 | asm volatile ("mtfsb1 30"); | |
167 | if ((unsigned int) round == 2) | |
168 | asm volatile ("mtfsb0 31"); | |
169 | else | |
170 | asm volatile ("mtfsb1 31"); | |
171 | } | |
e68b1151 | 172 | #endif |
5c68d401 RM |
173 | return 0; |
174 | } | |
5c68d401 | 175 | |
6cac323c AZ |
176 | /* Same as __fesetround_inline, however without runtime check to use DFP |
177 | mtfsfi syntax (as relax_fenv_state) or if round value is valid. */ | |
178 | static inline void | |
179 | __fesetround_inline_nocheck (const int round) | |
180 | { | |
e68b1151 PC |
181 | #ifdef _ARCH_PWR9 |
182 | __fe_mffscrn (round); | |
183 | #else | |
184 | if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)) | |
185 | __fe_mffscrn (round); | |
186 | else | |
187 | asm volatile ("mtfsfi 7,%0" : : "i" (round)); | |
188 | #endif | |
6cac323c AZ |
189 | } |
190 | ||
cd7ce12a PC |
191 | #define FPSCR_MASK(bit) (1 << (31 - (bit))) |
192 | ||
0413b54c UD |
193 | /* Definitions of all the FPSCR bit numbers */ |
194 | enum { | |
195 | FPSCR_FX = 0, /* exception summary */ | |
cd7ce12a | 196 | #define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX)) |
0413b54c | 197 | FPSCR_FEX, /* enabled exception summary */ |
cd7ce12a | 198 | #define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX)) |
0413b54c | 199 | FPSCR_VX, /* invalid operation summary */ |
cd7ce12a | 200 | #define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX)) |
0413b54c | 201 | FPSCR_OX, /* overflow */ |
cd7ce12a | 202 | #define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX)) |
0413b54c | 203 | FPSCR_UX, /* underflow */ |
cd7ce12a | 204 | #define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX)) |
0413b54c | 205 | FPSCR_ZX, /* zero divide */ |
cd7ce12a | 206 | #define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX)) |
0413b54c | 207 | FPSCR_XX, /* inexact */ |
cd7ce12a | 208 | #define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX)) |
67e971f1 | 209 | FPSCR_VXSNAN, /* invalid operation for sNaN */ |
cd7ce12a | 210 | #define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN)) |
0413b54c | 211 | FPSCR_VXISI, /* invalid operation for Inf-Inf */ |
cd7ce12a | 212 | #define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI)) |
0413b54c | 213 | FPSCR_VXIDI, /* invalid operation for Inf/Inf */ |
cd7ce12a | 214 | #define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI)) |
0413b54c | 215 | FPSCR_VXZDZ, /* invalid operation for 0/0 */ |
cd7ce12a | 216 | #define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ)) |
0413b54c | 217 | FPSCR_VXIMZ, /* invalid operation for Inf*0 */ |
cd7ce12a | 218 | #define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ)) |
0413b54c | 219 | FPSCR_VXVC, /* invalid operation for invalid compare */ |
cd7ce12a | 220 | #define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC)) |
0413b54c | 221 | FPSCR_FR, /* fraction rounded [fraction was incremented by round] */ |
cd7ce12a | 222 | #define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR)) |
0413b54c | 223 | FPSCR_FI, /* fraction inexact */ |
cd7ce12a | 224 | #define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI)) |
0413b54c | 225 | FPSCR_FPRF_C, /* result class descriptor */ |
cd7ce12a | 226 | #define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C)) |
0413b54c | 227 | FPSCR_FPRF_FL, /* result less than (usually, less than 0) */ |
cd7ce12a | 228 | #define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL)) |
0413b54c | 229 | FPSCR_FPRF_FG, /* result greater than */ |
cd7ce12a | 230 | #define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG)) |
0413b54c | 231 | FPSCR_FPRF_FE, /* result equal to */ |
cd7ce12a | 232 | #define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE)) |
0413b54c | 233 | FPSCR_FPRF_FU, /* result unordered */ |
cd7ce12a | 234 | #define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU)) |
0413b54c UD |
235 | FPSCR_20, /* reserved */ |
236 | FPSCR_VXSOFT, /* invalid operation set by software */ | |
cd7ce12a | 237 | #define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT)) |
0413b54c | 238 | FPSCR_VXSQRT, /* invalid operation for square root */ |
cd7ce12a | 239 | #define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT)) |
0413b54c | 240 | FPSCR_VXCVI, /* invalid operation for invalid integer convert */ |
cd7ce12a | 241 | #define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI)) |
0413b54c | 242 | FPSCR_VE, /* invalid operation exception enable */ |
cd7ce12a | 243 | #define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE)) |
0413b54c | 244 | FPSCR_OE, /* overflow exception enable */ |
cd7ce12a | 245 | #define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE)) |
0413b54c | 246 | FPSCR_UE, /* underflow exception enable */ |
cd7ce12a | 247 | #define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE)) |
0413b54c | 248 | FPSCR_ZE, /* zero divide exception enable */ |
cd7ce12a | 249 | #define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE)) |
0413b54c | 250 | FPSCR_XE, /* inexact exception enable */ |
cd7ce12a | 251 | #define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE)) |
edba7a54 UD |
252 | #ifdef _ARCH_PWR6 |
253 | FPSCR_29, /* Reserved in ISA 2.05 */ | |
cd7ce12a | 254 | #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29)) |
edba7a54 | 255 | #else |
cd7ce12a PC |
256 | FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */ |
257 | #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI)) | |
edba7a54 | 258 | #endif /* _ARCH_PWR6 */ |
0413b54c | 259 | /* the remaining two least-significant bits keep the rounding mode */ |
cd7ce12a PC |
260 | FPSCR_RN_hi, |
261 | #define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi)) | |
262 | FPSCR_RN_lo | |
263 | #define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo)) | |
0413b54c UD |
264 | }; |
265 | ||
cd7ce12a PC |
266 | #define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK) |
267 | #define FPSCR_ENABLES_MASK \ | |
268 | (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) | |
269 | #define FPSCR_BASIC_EXCEPTIONS_MASK \ | |
270 | (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) | |
7413c188 PC |
271 | #define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \ |
272 | FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \ | |
273 | FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \ | |
274 | FPSCR_VXCVI_MASK) | |
3c1766ea PC |
275 | #define FPSCR_FPRF_MASK \ |
276 | (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \ | |
277 | FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK) | |
cd7ce12a | 278 | #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK) |
3c1766ea | 279 | #define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK) |
cd7ce12a PC |
280 | |
281 | /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits | |
282 | in the FPSCR, albeit shifted to different but corresponding locations. | |
283 | Similarly, the exception indicator bits in the FPSCR correspond one-to-one | |
284 | with the exception enable bits. It is thus possible to map the FENV(1) | |
285 | exceptions directly to the FPSCR enables with a simple mask and shift, | |
286 | and vice versa. */ | |
287 | #define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22 | |
288 | ||
18f2945a AZ |
289 | static inline int |
290 | fenv_reg_to_exceptions (unsigned long long l) | |
291 | { | |
cd7ce12a PC |
292 | return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT; |
293 | } | |
294 | ||
295 | static inline unsigned long long | |
296 | fenv_exceptions_to_reg (int excepts) | |
297 | { | |
298 | return (unsigned long long) | |
299 | (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT; | |
18f2945a AZ |
300 | } |
301 | ||
edba7a54 UD |
302 | #ifdef _ARCH_PWR6 |
303 | /* Not supported in ISA 2.05. Provided for source compat only. */ | |
304 | # define FPSCR_NI 29 | |
305 | #endif /* _ARCH_PWR6 */ | |
306 | ||
0413b54c | 307 | /* This operation (i) sets the appropriate FPSCR bits for its |
67e971f1 | 308 | parameter, (ii) converts sNaN to the corresponding qNaN, and (iii) |
0413b54c UD |
309 | otherwise passes its parameter through unchanged (in particular, -0 |
310 | and +0 stay as they were). The `obvious' way to do this is optimised | |
311 | out by gcc. */ | |
312 | #define f_wash(x) \ | |
313 | ({ double d; asm volatile ("fmul %0,%1,%2" \ | |
314 | : "=f"(d) \ | |
315 | : "f" (x), "f"((float)1.0)); d; }) | |
316 | #define f_washf(x) \ | |
317 | ({ float f; asm volatile ("fmuls %0,%1,%2" \ | |
318 | : "=f"(f) \ | |
319 | : "f" (x), "f"((float)1.0)); f; }) | |
246ec411 | 320 | |
1f205a47 | 321 | #endif /* fenv_libc.h */ |