]>
Commit | Line | Data |
---|---|---|
ddb28975 | 1 | /* Inline math functions for i387 and SSE. |
688903eb | 2 | Copyright (C) 1995-2018 Free Software Foundation, Inc. |
f8b87ef0 | 3 | This file is part of the GNU C Library. |
b20e47cb | 4 | |
f8b87ef0 | 5 | The GNU C Library is free software; you can redistribute it and/or |
41bdb6e2 AJ |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either | |
8 | version 2.1 of the License, or (at your option) any later version. | |
b20e47cb | 9 | |
f8b87ef0 UD |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 13 | Lesser General Public License for more details. |
b20e47cb | 14 | |
41bdb6e2 | 15 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 PE |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ | |
b20e47cb | 18 | |
61eb22d3 UD |
19 | #ifndef _MATH_H |
20 | # error "Never use <bits/mathinline.h> directly; include <math.h> instead." | |
21 | #endif | |
f8b87ef0 | 22 | |
ddb28975 | 23 | #ifndef __extern_always_inline |
f43ce637 UD |
24 | # define __MATH_INLINE __inline |
25 | #else | |
ddb28975 | 26 | # define __MATH_INLINE __extern_always_inline |
f43ce637 UD |
27 | #endif |
28 | ||
f41c8091 UD |
29 | /* The gcc, version 2.7 or below, has problems with all this inlining |
30 | code. So disable it for this version of the compiler. */ | |
4360eafd | 31 | #if __GNUC_PREREQ (2, 8) |
ddb28975 L |
32 | # if !__GNUC_PREREQ (3, 4) && !defined __NO_MATH_INLINES \ |
33 | && defined __OPTIMIZE__ | |
34 | /* GCC 3.4 introduced builtins for all functions below, so | |
35 | there's no need to define any of these inline functions. */ | |
36 | ||
37 | # ifdef __USE_ISOC99 | |
ddb28975 L |
38 | |
39 | /* Round to nearest integer. */ | |
40 | # ifdef __SSE_MATH__ | |
41 | __MATH_INLINE long int | |
42 | __NTH (lrintf (float __x)) | |
43 | { | |
44 | long int __res; | |
45 | /* Mark as volatile since the result is dependent on the state of | |
46 | the SSE control register (the rounding mode). Otherwise GCC might | |
47 | remove these assembler instructions since it does not know about | |
48 | the rounding mode change and cannot currently be told. */ | |
49 | __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); | |
50 | return __res; | |
51 | } | |
52 | # endif | |
53 | # ifdef __SSE2_MATH__ | |
54 | __MATH_INLINE long int | |
55 | __NTH (lrint (double __x)) | |
56 | { | |
57 | long int __res; | |
58 | /* Mark as volatile since the result is dependent on the state of | |
59 | the SSE control register (the rounding mode). Otherwise GCC might | |
60 | remove these assembler instructions since it does not know about | |
61 | the rounding mode change and cannot currently be told. */ | |
62 | __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); | |
63 | return __res; | |
64 | } | |
65 | # endif | |
66 | # ifdef __x86_64__ | |
828beb13 | 67 | __extension__ |
ddb28975 L |
68 | __MATH_INLINE long long int |
69 | __NTH (llrintf (float __x)) | |
70 | { | |
71 | long long int __res; | |
72 | /* Mark as volatile since the result is dependent on the state of | |
73 | the SSE control register (the rounding mode). Otherwise GCC might | |
74 | remove these assembler instructions since it does not know about | |
75 | the rounding mode change and cannot currently be told. */ | |
76 | __asm __volatile__ ("cvtss2si %1, %0" : "=r" (__res) : "xm" (__x)); | |
77 | return __res; | |
78 | } | |
828beb13 | 79 | __extension__ |
ddb28975 L |
80 | __MATH_INLINE long long int |
81 | __NTH (llrint (double __x)) | |
82 | { | |
83 | long long int __res; | |
84 | /* Mark as volatile since the result is dependent on the state of | |
85 | the SSE control register (the rounding mode). Otherwise GCC might | |
86 | remove these assembler instructions since it does not know about | |
87 | the rounding mode change and cannot currently be told. */ | |
88 | __asm __volatile__ ("cvtsd2si %1, %0" : "=r" (__res) : "xm" (__x)); | |
89 | return __res; | |
90 | } | |
91 | # endif | |
92 | ||
93 | # if defined __FINITE_MATH_ONLY__ && __FINITE_MATH_ONLY__ > 0 \ | |
94 | && defined __SSE2_MATH__ | |
95 | /* Determine maximum of two values. */ | |
96 | __MATH_INLINE float | |
97 | __NTH (fmaxf (float __x, float __y)) | |
98 | { | |
99 | # ifdef __AVX__ | |
100 | float __res; | |
101 | __asm ("vmaxss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); | |
102 | return __res; | |
103 | # else | |
104 | __asm ("maxss %1, %0" : "+x" (__x) : "xm" (__y)); | |
105 | return __x; | |
106 | # endif | |
107 | } | |
108 | __MATH_INLINE double | |
109 | __NTH (fmax (double __x, double __y)) | |
110 | { | |
111 | # ifdef __AVX__ | |
112 | float __res; | |
113 | __asm ("vmaxsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); | |
114 | return __res; | |
115 | # else | |
116 | __asm ("maxsd %1, %0" : "+x" (__x) : "xm" (__y)); | |
117 | return __x; | |
118 | # endif | |
119 | } | |
120 | ||
121 | /* Determine minimum of two values. */ | |
122 | __MATH_INLINE float | |
123 | __NTH (fminf (float __x, float __y)) | |
124 | { | |
125 | # ifdef __AVX__ | |
126 | float __res; | |
127 | __asm ("vminss %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); | |
128 | return __res; | |
129 | # else | |
130 | __asm ("minss %1, %0" : "+x" (__x) : "xm" (__y)); | |
131 | return __x; | |
132 | # endif | |
133 | } | |
134 | __MATH_INLINE double | |
135 | __NTH (fmin (double __x, double __y)) | |
136 | { | |
137 | # ifdef __AVX__ | |
138 | float __res; | |
139 | __asm ("vminsd %2, %1, %0" : "=x" (__res) : "x" (x), "xm" (__y)); | |
140 | return __res; | |
141 | # else | |
142 | __asm ("minsd %1, %0" : "+x" (__x) : "xm" (__y)); | |
143 | return __x; | |
144 | # endif | |
145 | } | |
146 | # endif | |
147 | ||
ddb28975 L |
148 | # endif |
149 | ||
150 | # if defined __SSE4_1__ && defined __SSE2_MATH__ | |
acd7f096 | 151 | # if defined __USE_XOPEN_EXTENDED || defined __USE_ISOC99 |
ddb28975 L |
152 | |
153 | /* Round to nearest integer. */ | |
154 | __MATH_INLINE double | |
155 | __NTH (rint (double __x)) | |
156 | { | |
157 | double __res; | |
158 | /* Mark as volatile since the result is dependent on the state of | |
159 | the SSE control register (the rounding mode). Otherwise GCC might | |
160 | remove these assembler instructions since it does not know about | |
161 | the rounding mode change and cannot currently be told. */ | |
162 | __asm __volatile__ ("roundsd $4, %1, %0" : "=x" (__res) : "xm" (__x)); | |
163 | return __res; | |
164 | } | |
165 | __MATH_INLINE float | |
166 | __NTH (rintf (float __x)) | |
167 | { | |
168 | float __res; | |
169 | /* Mark as volatile since the result is dependent on the state of | |
170 | the SSE control register (the rounding mode). Otherwise GCC might | |
171 | remove these assembler instructions since it does not know about | |
172 | the rounding mode change and cannot currently be told. */ | |
173 | __asm __volatile__ ("roundss $4, %1, %0" : "=x" (__res) : "xm" (__x)); | |
174 | return __res; | |
175 | } | |
176 | ||
177 | # ifdef __USE_ISOC99 | |
178 | /* Round to nearest integer without raising inexact exception. */ | |
179 | __MATH_INLINE double | |
180 | __NTH (nearbyint (double __x)) | |
181 | { | |
182 | double __res; | |
183 | /* Mark as volatile since the result is dependent on the state of | |
184 | the SSE control register (the rounding mode). Otherwise GCC might | |
185 | remove these assembler instructions since it does not know about | |
186 | the rounding mode change and cannot currently be told. */ | |
187 | __asm __volatile__ ("roundsd $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); | |
188 | return __res; | |
189 | } | |
190 | __MATH_INLINE float | |
191 | __NTH (nearbyintf (float __x)) | |
192 | { | |
193 | float __res; | |
194 | /* Mark as volatile since the result is dependent on the state of | |
195 | the SSE control register (the rounding mode). Otherwise GCC might | |
196 | remove these assembler instructions since it does not know about | |
197 | the rounding mode change and cannot currently be told. */ | |
198 | __asm __volatile__ ("roundss $0xc, %1, %0" : "=x" (__res) : "xm" (__x)); | |
199 | return __res; | |
200 | } | |
201 | # endif | |
202 | ||
ddb28975 L |
203 | # endif |
204 | ||
ddb28975 L |
205 | /* Smallest integral value not less than X. */ |
206 | __MATH_INLINE double | |
207 | __NTH (ceil (double __x)) | |
208 | { | |
209 | double __res; | |
210 | __asm ("roundsd $2, %1, %0" : "=x" (__res) : "xm" (__x)); | |
211 | return __res; | |
212 | } | |
ddb28975 | 213 | |
ddb28975 L |
214 | __MATH_INLINE float |
215 | __NTH (ceilf (float __x)) | |
216 | { | |
217 | float __res; | |
218 | __asm ("roundss $2, %1, %0" : "=x" (__res) : "xm" (__x)); | |
219 | return __res; | |
220 | } | |
ddb28975 | 221 | |
ddb28975 L |
222 | /* Largest integer not greater than X. */ |
223 | __MATH_INLINE double | |
224 | __NTH (floor (double __x)) | |
225 | { | |
226 | double __res; | |
227 | __asm ("roundsd $1, %1, %0" : "=x" (__res) : "xm" (__x)); | |
228 | return __res; | |
229 | } | |
ddb28975 | 230 | |
ddb28975 L |
231 | __MATH_INLINE float |
232 | __NTH (floorf (float __x)) | |
233 | { | |
234 | float __res; | |
235 | __asm ("roundss $1, %1, %0" : "=x" (__res) : "xm" (__x)); | |
236 | return __res; | |
237 | } | |
ddb28975 L |
238 | # endif |
239 | # endif | |
ed1825f8 | 240 | #endif |
f41c8091 | 241 | |
508ce3ac | 242 | /* Disable x87 inlines when -fpmath=sse is passed and also when we're building |
e44c8b0e | 243 | on x86_64. Older gcc (gcc-3.2 for example) does not define __SSE2_MATH__ |
508ce3ac SP |
244 | for x86_64. */ |
245 | #if !defined __SSE2_MATH__ && !defined __x86_64__ | |
ed1825f8 | 246 | # if ((!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \ |
f43ce637 | 247 | && defined __OPTIMIZE__) |
b20e47cb | 248 | |
7b943af6 UD |
249 | /* The inline functions do not set errno or raise necessarily the |
250 | correct exceptions. */ | |
ed1825f8 | 251 | # undef math_errhandling |
7b943af6 | 252 | |
3996f34b UD |
253 | /* A macro to define float, double, and long double versions of various |
254 | math functions for the ix87 FPU. FUNC is the function name (which will | |
255 | be suffixed with f and l for the float and long double version, | |
300583a7 UD |
256 | respectively). OP is the name of the FPU operation. |
257 | We define two sets of macros. The set with the additional NP | |
258 | doesn't add a prototype declaration. */ | |
b20e47cb | 259 | |
acd7f096 | 260 | # ifdef __USE_ISOC99 |
ed1825f8 | 261 | # define __inline_mathop(func, op) \ |
3996f34b UD |
262 | __inline_mathop_ (double, func, op) \ |
263 | __inline_mathop_ (float, __CONCAT(func,f), op) \ | |
264 | __inline_mathop_ (long double, __CONCAT(func,l), op) | |
ed1825f8 | 265 | # define __inline_mathopNP(func, op) \ |
300583a7 UD |
266 | __inline_mathopNP_ (double, func, op) \ |
267 | __inline_mathopNP_ (float, __CONCAT(func,f), op) \ | |
268 | __inline_mathopNP_ (long double, __CONCAT(func,l), op) | |
ed1825f8 L |
269 | # else |
270 | # define __inline_mathop(func, op) \ | |
3996f34b | 271 | __inline_mathop_ (double, func, op) |
ed1825f8 | 272 | # define __inline_mathopNP(func, op) \ |
300583a7 | 273 | __inline_mathopNP_ (double, func, op) |
ed1825f8 | 274 | # endif |
b20e47cb | 275 | |
ed1825f8 | 276 | # define __inline_mathop_(float_type, func, op) \ |
3996f34b | 277 | __inline_mathop_decl_ (float_type, func, op, "0" (__x)) |
ed1825f8 | 278 | # define __inline_mathopNP_(float_type, func, op) \ |
300583a7 | 279 | __inline_mathop_declNP_ (float_type, func, op, "0" (__x)) |
b20e47cb | 280 | |
b20e47cb | 281 | |
acd7f096 | 282 | # ifdef __USE_ISOC99 |
ed1825f8 | 283 | # define __inline_mathop_decl(func, op, params...) \ |
3996f34b UD |
284 | __inline_mathop_decl_ (double, func, op, params) \ |
285 | __inline_mathop_decl_ (float, __CONCAT(func,f), op, params) \ | |
286 | __inline_mathop_decl_ (long double, __CONCAT(func,l), op, params) | |
ed1825f8 | 287 | # define __inline_mathop_declNP(func, op, params...) \ |
300583a7 UD |
288 | __inline_mathop_declNP_ (double, func, op, params) \ |
289 | __inline_mathop_declNP_ (float, __CONCAT(func,f), op, params) \ | |
290 | __inline_mathop_declNP_ (long double, __CONCAT(func,l), op, params) | |
ed1825f8 L |
291 | # else |
292 | # define __inline_mathop_decl(func, op, params...) \ | |
3996f34b | 293 | __inline_mathop_decl_ (double, func, op, params) |
ed1825f8 | 294 | # define __inline_mathop_declNP(func, op, params...) \ |
300583a7 | 295 | __inline_mathop_declNP_ (double, func, op, params) |
ed1825f8 | 296 | # endif |
b20e47cb | 297 | |
ed1825f8 | 298 | # define __inline_mathop_decl_(float_type, func, op, params...) \ |
cc46c92d | 299 | __MATH_INLINE float_type func (float_type) __THROW; \ |
300583a7 UD |
300 | __inline_mathop_declNP_ (float_type, func, op, params) |
301 | ||
ed1825f8 | 302 | # define __inline_mathop_declNP_(float_type, func, op, params...) \ |
f377d022 | 303 | __MATH_INLINE float_type __NTH (func (float_type __x)) \ |
3996f34b UD |
304 | { \ |
305 | register float_type __result; \ | |
8f2ece69 | 306 | __asm __volatile__ (op : "=t" (__result) : params); \ |
3996f34b UD |
307 | return __result; \ |
308 | } | |
309 | ||
310 | ||
acd7f096 | 311 | # ifdef __USE_ISOC99 |
ed1825f8 | 312 | # define __inline_mathcode(func, arg, code) \ |
3996f34b UD |
313 | __inline_mathcode_ (double, func, arg, code) \ |
314 | __inline_mathcode_ (float, __CONCAT(func,f), arg, code) \ | |
315 | __inline_mathcode_ (long double, __CONCAT(func,l), arg, code) | |
ed1825f8 | 316 | # define __inline_mathcodeNP(func, arg, code) \ |
3dbfd811 | 317 | __inline_mathcodeNP_ (double, func, arg, code) \ |
300583a7 UD |
318 | __inline_mathcodeNP_ (float, __CONCAT(func,f), arg, code) \ |
319 | __inline_mathcodeNP_ (long double, __CONCAT(func,l), arg, code) | |
ed1825f8 | 320 | # define __inline_mathcode2(func, arg1, arg2, code) \ |
1ea89a40 UD |
321 | __inline_mathcode2_ (double, func, arg1, arg2, code) \ |
322 | __inline_mathcode2_ (float, __CONCAT(func,f), arg1, arg2, code) \ | |
323 | __inline_mathcode2_ (long double, __CONCAT(func,l), arg1, arg2, code) | |
ed1825f8 | 324 | # define __inline_mathcodeNP2(func, arg1, arg2, code) \ |
3dbfd811 | 325 | __inline_mathcodeNP2_ (double, func, arg1, arg2, code) \ |
300583a7 UD |
326 | __inline_mathcodeNP2_ (float, __CONCAT(func,f), arg1, arg2, code) \ |
327 | __inline_mathcodeNP2_ (long double, __CONCAT(func,l), arg1, arg2, code) | |
ed1825f8 | 328 | # define __inline_mathcode3(func, arg1, arg2, arg3, code) \ |
1ea89a40 UD |
329 | __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) \ |
330 | __inline_mathcode3_ (float, __CONCAT(func,f), arg1, arg2, arg3, code) \ | |
331 | __inline_mathcode3_ (long double, __CONCAT(func,l), arg1, arg2, arg3, code) | |
ed1825f8 | 332 | # define __inline_mathcodeNP3(func, arg1, arg2, arg3, code) \ |
300583a7 | 333 | __inline_mathcodeNP3_ (double, func, arg1, arg2, arg3, code) \ |
3dbfd811 | 334 | __inline_mathcodeNP3_ (float, __CONCAT(func,f), arg1, arg2, arg3, code) \ |
300583a7 | 335 | __inline_mathcodeNP3_ (long double, __CONCAT(func,l), arg1, arg2, arg3, code) |
ed1825f8 L |
336 | # else |
337 | # define __inline_mathcode(func, arg, code) \ | |
1ea89a40 | 338 | __inline_mathcode_ (double, func, (arg), code) |
ed1825f8 | 339 | # define __inline_mathcodeNP(func, arg, code) \ |
300583a7 | 340 | __inline_mathcodeNP_ (double, func, (arg), code) |
ed1825f8 | 341 | # define __inline_mathcode2(func, arg1, arg2, code) \ |
1ea89a40 | 342 | __inline_mathcode2_ (double, func, arg1, arg2, code) |
ed1825f8 | 343 | # define __inline_mathcodeNP2(func, arg1, arg2, code) \ |
300583a7 | 344 | __inline_mathcodeNP2_ (double, func, arg1, arg2, code) |
ed1825f8 | 345 | # define __inline_mathcode3(func, arg1, arg2, arg3, code) \ |
1ea89a40 | 346 | __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) |
ed1825f8 | 347 | # define __inline_mathcodeNP3(func, arg1, arg2, arg3, code) \ |
300583a7 | 348 | __inline_mathcodeNP3_ (double, func, arg1, arg2, arg3, code) |
ed1825f8 | 349 | # endif |
b20e47cb | 350 | |
ed1825f8 | 351 | # define __inline_mathcode_(float_type, func, arg, code) \ |
cc46c92d | 352 | __MATH_INLINE float_type func (float_type) __THROW; \ |
300583a7 UD |
353 | __inline_mathcodeNP_(float_type, func, arg, code) |
354 | ||
ed1825f8 | 355 | # define __inline_mathcodeNP_(float_type, func, arg, code) \ |
f377d022 | 356 | __MATH_INLINE float_type __NTH (func (float_type arg)) \ |
3996f34b UD |
357 | { \ |
358 | code; \ | |
359 | } | |
b20e47cb | 360 | |
300583a7 | 361 | |
ed1825f8 | 362 | # define __inline_mathcode2_(float_type, func, arg1, arg2, code) \ |
cc46c92d | 363 | __MATH_INLINE float_type func (float_type, float_type) __THROW; \ |
300583a7 UD |
364 | __inline_mathcodeNP2_ (float_type, func, arg1, arg2, code) |
365 | ||
ed1825f8 | 366 | # define __inline_mathcodeNP2_(float_type, func, arg1, arg2, code) \ |
f377d022 | 367 | __MATH_INLINE float_type __NTH (func (float_type arg1, float_type arg2)) \ |
3996f34b UD |
368 | { \ |
369 | code; \ | |
370 | } | |
371 | ||
ed1825f8 | 372 | # define __inline_mathcode3_(float_type, func, arg1, arg2, arg3, code) \ |
cc46c92d | 373 | __MATH_INLINE float_type func (float_type, float_type, float_type) __THROW; \ |
300583a7 UD |
374 | __inline_mathcodeNP3_(float_type, func, arg1, arg2, arg3, code) |
375 | ||
ed1825f8 | 376 | # define __inline_mathcodeNP3_(float_type, func, arg1, arg2, arg3, code) \ |
f377d022 UD |
377 | __MATH_INLINE float_type __NTH (func (float_type arg1, float_type arg2, \ |
378 | float_type arg3)) \ | |
1ea89a40 UD |
379 | { \ |
380 | code; \ | |
381 | } | |
ed1825f8 | 382 | # endif |
1ea89a40 | 383 | |
3996f34b | 384 | |
ed1825f8 L |
385 | # if !defined __NO_MATH_INLINES && defined __OPTIMIZE__ |
386 | /* Miscellaneous functions */ | |
f41c8091 | 387 | |
378fbeb4 | 388 | /* __FAST_MATH__ is defined by gcc -ffast-math. */ |
ed1825f8 L |
389 | # ifdef __FAST_MATH__ |
390 | # ifdef __USE_GNU | |
391 | # define __sincos_code \ | |
f41c8091 UD |
392 | register long double __cosr; \ |
393 | register long double __sinr; \ | |
8de131cb | 394 | register unsigned int __swtmp; \ |
f41c8091 UD |
395 | __asm __volatile__ \ |
396 | ("fsincos\n\t" \ | |
8de131cb RM |
397 | "fnstsw %w2\n\t" \ |
398 | "testl $0x400, %2\n\t" \ | |
f41c8091 UD |
399 | "jz 1f\n\t" \ |
400 | "fldpi\n\t" \ | |
401 | "fadd %%st(0)\n\t" \ | |
402 | "fxch %%st(1)\n\t" \ | |
403 | "2: fprem1\n\t" \ | |
8de131cb RM |
404 | "fnstsw %w2\n\t" \ |
405 | "testl $0x400, %2\n\t" \ | |
f41c8091 UD |
406 | "jnz 2b\n\t" \ |
407 | "fstp %%st(1)\n\t" \ | |
408 | "fsincos\n\t" \ | |
409 | "1:" \ | |
8de131cb | 410 | : "=t" (__cosr), "=u" (__sinr), "=a" (__swtmp) : "0" (__x)); \ |
f41c8091 UD |
411 | *__sinx = __sinr; \ |
412 | *__cosx = __cosr | |
413 | ||
f41c8091 | 414 | __MATH_INLINE void |
f377d022 | 415 | __NTH (__sincos (double __x, double *__sinx, double *__cosx)) |
f41c8091 UD |
416 | { |
417 | __sincos_code; | |
418 | } | |
419 | ||
f41c8091 | 420 | __MATH_INLINE void |
f377d022 | 421 | __NTH (__sincosf (float __x, float *__sinx, float *__cosx)) |
f41c8091 UD |
422 | { |
423 | __sincos_code; | |
424 | } | |
425 | ||
f41c8091 | 426 | __MATH_INLINE void |
f377d022 | 427 | __NTH (__sincosl (long double __x, long double *__sinx, long double *__cosx)) |
f41c8091 UD |
428 | { |
429 | __sincos_code; | |
430 | } | |
ed1825f8 | 431 | # endif |
f41c8091 UD |
432 | |
433 | ||
8f2ece69 | 434 | /* Optimized inline implementation, sometimes with reduced precision |
3996f34b | 435 | and/or argument range. */ |
8f2ece69 | 436 | |
ed1825f8 L |
437 | # if __GNUC_PREREQ (3, 5) |
438 | # define __expm1_code \ | |
3dbfd811 UD |
439 | register long double __temp; \ |
440 | __temp = __builtin_expm1l (__x); \ | |
441 | return __temp ? __temp : __x | |
ed1825f8 L |
442 | # else |
443 | # define __expm1_code \ | |
3996f34b UD |
444 | register long double __value; \ |
445 | register long double __exponent; \ | |
446 | register long double __temp; \ | |
447 | __asm __volatile__ \ | |
448 | ("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" \ | |
449 | "fmul %%st(1) # x * log2(e)\n\t" \ | |
50304ef0 | 450 | "fst %%st(1)\n\t" \ |
3996f34b UD |
451 | "frndint # int(x * log2(e))\n\t" \ |
452 | "fxch\n\t" \ | |
453 | "fsub %%st(1) # fract(x * log2(e))\n\t" \ | |
454 | "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ | |
455 | "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" \ | |
456 | : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ | |
457 | __asm __volatile__ \ | |
458 | ("fscale # 2^int(x * log2(e))\n\t" \ | |
459 | : "=t" (__temp) : "0" (1.0), "u" (__exponent)); \ | |
460 | __temp -= 1.0; \ | |
4e142297 RM |
461 | __temp += __value; \ |
462 | return __temp ? __temp : __x | |
ed1825f8 | 463 | # endif |
300583a7 | 464 | __inline_mathcodeNP_ (long double, __expm1l, __x, __expm1_code) |
3996f34b | 465 | |
ed1825f8 | 466 | # if __GNUC_PREREQ (3, 4) |
3dbfd811 | 467 | __inline_mathcodeNP_ (long double, __expl, __x, return __builtin_expl (__x)) |
ed1825f8 L |
468 | # else |
469 | # define __exp_code \ | |
3996f34b UD |
470 | register long double __value; \ |
471 | register long double __exponent; \ | |
472 | __asm __volatile__ \ | |
473 | ("fldl2e # e^x = 2^(x * log2(e))\n\t" \ | |
474 | "fmul %%st(1) # x * log2(e)\n\t" \ | |
50304ef0 | 475 | "fst %%st(1)\n\t" \ |
3996f34b UD |
476 | "frndint # int(x * log2(e))\n\t" \ |
477 | "fxch\n\t" \ | |
478 | "fsub %%st(1) # fract(x * log2(e))\n\t" \ | |
479 | "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ | |
480 | : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ | |
481 | __value += 1.0; \ | |
482 | __asm __volatile__ \ | |
483 | ("fscale" \ | |
484 | : "=t" (__value) : "0" (__value), "u" (__exponent)); \ | |
485 | return __value | |
300583a7 UD |
486 | __inline_mathcodeNP (exp, __x, __exp_code) |
487 | __inline_mathcodeNP_ (long double, __expl, __x, __exp_code) | |
ed1825f8 | 488 | # endif |
3996f34b UD |
489 | |
490 | ||
ed1825f8 | 491 | # if !__GNUC_PREREQ (3, 5) |
300583a7 | 492 | __inline_mathcodeNP (tan, __x, \ |
3996f34b | 493 | register long double __value; \ |
fa1f94fe | 494 | register long double __value2 __attribute__ ((__unused__)); \ |
3996f34b UD |
495 | __asm __volatile__ \ |
496 | ("fptan" \ | |
497 | : "=t" (__value2), "=u" (__value) : "0" (__x)); \ | |
498 | return __value) | |
ed1825f8 L |
499 | # endif |
500 | # endif /* __FAST_MATH__ */ | |
378fbeb4 UD |
501 | |
502 | ||
ed1825f8 | 503 | # if __GNUC_PREREQ (3, 4) |
3dbfd811 UD |
504 | __inline_mathcodeNP2_ (long double, __atan2l, __y, __x, |
505 | return __builtin_atan2l (__y, __x)) | |
ed1825f8 L |
506 | # else |
507 | # define __atan2_code \ | |
378fbeb4 UD |
508 | register long double __value; \ |
509 | __asm __volatile__ \ | |
510 | ("fpatan" \ | |
511 | : "=t" (__value) : "0" (__x), "u" (__y) : "st(1)"); \ | |
512 | return __value | |
ed1825f8 | 513 | # ifdef __FAST_MATH__ |
378fbeb4 | 514 | __inline_mathcodeNP2 (atan2, __y, __x, __atan2_code) |
ed1825f8 | 515 | # endif |
378fbeb4 | 516 | __inline_mathcodeNP2_ (long double, __atan2l, __y, __x, __atan2_code) |
ed1825f8 | 517 | # endif |
378fbeb4 UD |
518 | |
519 | ||
ed1825f8 | 520 | # if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) |
378fbeb4 UD |
521 | __inline_mathcodeNP2 (fmod, __x, __y, \ |
522 | register long double __value; \ | |
523 | __asm __volatile__ \ | |
524 | ("1: fprem\n\t" \ | |
525 | "fnstsw %%ax\n\t" \ | |
526 | "sahf\n\t" \ | |
527 | "jp 1b" \ | |
528 | : "=t" (__value) : "0" (__x), "u" (__y) : "ax", "cc"); \ | |
529 | return __value) | |
ed1825f8 | 530 | # endif |
3996f34b UD |
531 | |
532 | ||
ed1825f8 L |
533 | # ifdef __FAST_MATH__ |
534 | # if !__GNUC_PREREQ (3,3) | |
300583a7 UD |
535 | __inline_mathopNP (sqrt, "fsqrt") |
536 | __inline_mathopNP_ (long double, __sqrtl, "fsqrt") | |
ed1825f8 L |
537 | # define __libc_sqrtl(n) __sqrtl (n) |
538 | # else | |
539 | # define __libc_sqrtl(n) __builtin_sqrtl (n) | |
540 | # endif | |
541 | # endif | |
3996f34b | 542 | |
ed1825f8 | 543 | # if __GNUC_PREREQ (2, 8) |
300583a7 | 544 | __inline_mathcodeNP_ (double, fabs, __x, return __builtin_fabs (__x)) |
acd7f096 | 545 | # ifdef __USE_ISOC99 |
300583a7 UD |
546 | __inline_mathcodeNP_ (float, fabsf, __x, return __builtin_fabsf (__x)) |
547 | __inline_mathcodeNP_ (long double, fabsl, __x, return __builtin_fabsl (__x)) | |
ed1825f8 | 548 | # endif |
300583a7 | 549 | __inline_mathcodeNP_ (long double, __fabsl, __x, return __builtin_fabsl (__x)) |
ed1825f8 | 550 | # else |
3996f34b | 551 | __inline_mathop (fabs, "fabs") |
8f2ece69 | 552 | __inline_mathop_ (long double, __fabsl, "fabs") |
ed1825f8 | 553 | # endif |
3996f34b | 554 | |
ed1825f8 L |
555 | # ifdef __FAST_MATH__ |
556 | # if !__GNUC_PREREQ (3, 4) | |
3996f34b | 557 | /* The argument range of this inline version is reduced. */ |
300583a7 | 558 | __inline_mathopNP (sin, "fsin") |
3996f34b | 559 | /* The argument range of this inline version is reduced. */ |
300583a7 | 560 | __inline_mathopNP (cos, "fcos") |
3996f34b | 561 | |
300583a7 | 562 | __inline_mathop_declNP (log, "fldln2; fxch; fyl2x", "0" (__x) : "st(1)") |
ed1825f8 | 563 | # endif |
3dbfd811 | 564 | |
ed1825f8 | 565 | # if !__GNUC_PREREQ (3, 5) |
300583a7 | 566 | __inline_mathop_declNP (log10, "fldlg2; fxch; fyl2x", "0" (__x) : "st(1)") |
3996f34b | 567 | |
0135bde4 UD |
568 | __inline_mathcodeNP (asin, __x, return __atan2l (__x, __libc_sqrtl (1.0 - __x * __x))) |
569 | __inline_mathcodeNP (acos, __x, return __atan2l (__libc_sqrtl (1.0 - __x * __x), __x)) | |
ed1825f8 | 570 | # endif |
378fbeb4 | 571 | |
ed1825f8 | 572 | # if !__GNUC_PREREQ (3, 4) |
378fbeb4 | 573 | __inline_mathop_declNP (atan, "fld1; fpatan", "0" (__x) : "st(1)") |
ed1825f8 L |
574 | # endif |
575 | # endif /* __FAST_MATH__ */ | |
3996f34b | 576 | |
15daa639 | 577 | __inline_mathcode_ (long double, __sgn1l, __x, \ |
2caacb0a | 578 | __extension__ union { long double __xld; unsigned int __xi[3]; } __n = \ |
e150fddc | 579 | { __xld: __x }; \ |
15daa639 UD |
580 | __n.__xi[2] = (__n.__xi[2] & 0x8000) | 0x3fff; \ |
581 | __n.__xi[1] = 0x80000000; \ | |
582 | __n.__xi[0] = 0; \ | |
583 | return __n.__xld) | |
3996f34b UD |
584 | |
585 | ||
ed1825f8 | 586 | # ifdef __FAST_MATH__ |
3996f34b | 587 | /* The argument range of the inline version of sinhl is slightly reduced. */ |
300583a7 | 588 | __inline_mathcodeNP (sinh, __x, \ |
8f2ece69 | 589 | register long double __exm1 = __expm1l (__fabsl (__x)); \ |
3996f34b UD |
590 | return 0.5 * (__exm1 / (__exm1 + 1.0) + __exm1) * __sgn1l (__x)) |
591 | ||
300583a7 | 592 | __inline_mathcodeNP (cosh, __x, \ |
3996f34b UD |
593 | register long double __ex = __expl (__x); \ |
594 | return 0.5 * (__ex + 1.0 / __ex)) | |
595 | ||
300583a7 | 596 | __inline_mathcodeNP (tanh, __x, \ |
8f2ece69 | 597 | register long double __exm1 = __expm1l (-__fabsl (__x + __x)); \ |
3996f34b | 598 | return __exm1 / (__exm1 + 2.0) * __sgn1l (-__x)) |
ed1825f8 | 599 | # endif |
3996f34b | 600 | |
300583a7 | 601 | __inline_mathcodeNP (floor, __x, \ |
3996f34b | 602 | register long double __value; \ |
5c2af134 UD |
603 | register int __ignore; \ |
604 | unsigned short int __cw; \ | |
605 | unsigned short int __cwtmp; \ | |
e1b37083 UD |
606 | __asm __volatile ("fnstcw %3\n\t" \ |
607 | "movzwl %3, %1\n\t" \ | |
5c2af134 UD |
608 | "andl $0xf3ff, %1\n\t" \ |
609 | "orl $0x0400, %1\n\t" /* rounding down */ \ | |
e1b37083 UD |
610 | "movw %w1, %2\n\t" \ |
611 | "fldcw %2\n\t" \ | |
5c2af134 | 612 | "frndint\n\t" \ |
e1b37083 UD |
613 | "fldcw %3" \ |
614 | : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ | |
615 | "=m" (__cw) \ | |
616 | : "0" (__x)); \ | |
3996f34b UD |
617 | return __value) |
618 | ||
300583a7 | 619 | __inline_mathcodeNP (ceil, __x, \ |
3996f34b | 620 | register long double __value; \ |
5c2af134 UD |
621 | register int __ignore; \ |
622 | unsigned short int __cw; \ | |
623 | unsigned short int __cwtmp; \ | |
e1b37083 UD |
624 | __asm __volatile ("fnstcw %3\n\t" \ |
625 | "movzwl %3, %1\n\t" \ | |
5c2af134 UD |
626 | "andl $0xf3ff, %1\n\t" \ |
627 | "orl $0x0800, %1\n\t" /* rounding up */ \ | |
e1b37083 UD |
628 | "movw %w1, %2\n\t" \ |
629 | "fldcw %2\n\t" \ | |
5c2af134 | 630 | "frndint\n\t" \ |
e1b37083 UD |
631 | "fldcw %3" \ |
632 | : "=t" (__value), "=&q" (__ignore), "=m" (__cwtmp), \ | |
633 | "=m" (__cw) \ | |
634 | : "0" (__x)); \ | |
3996f34b | 635 | return __value) |
b20e47cb | 636 | |
ed1825f8 L |
637 | # ifdef __FAST_MATH__ |
638 | # define __ldexp_code \ | |
8f2ece69 UD |
639 | register long double __value; \ |
640 | __asm __volatile__ \ | |
641 | ("fscale" \ | |
642 | : "=t" (__value) : "0" (__x), "u" ((long double) __y)); \ | |
643 | return __value | |
644 | ||
8f2ece69 | 645 | __MATH_INLINE double |
f377d022 | 646 | __NTH (ldexp (double __x, int __y)) |
8f2ece69 UD |
647 | { |
648 | __ldexp_code; | |
649 | } | |
ed1825f8 | 650 | # endif |
8f2ece69 | 651 | |
b20e47cb RM |
652 | |
653 | /* Optimized versions for some non-standardized functions. */ | |
acd7f096 | 654 | # ifdef __USE_ISOC99 |
b20e47cb | 655 | |
ed1825f8 | 656 | # ifdef __FAST_MATH__ |
300583a7 | 657 | __inline_mathcodeNP (expm1, __x, __expm1_code) |
b20e47cb | 658 | |
714a562f UD |
659 | /* We cannot rely on M_SQRT being defined. So we do it for ourself |
660 | here. */ | |
ed1825f8 | 661 | # define __M_SQRT2 1.41421356237309504880L /* sqrt(2) */ |
714a562f | 662 | |
ed1825f8 | 663 | # if !__GNUC_PREREQ (3, 5) |
300583a7 | 664 | __inline_mathcodeNP (log1p, __x, \ |
3996f34b | 665 | register long double __value; \ |
8f2ece69 | 666 | if (__fabsl (__x) >= 1.0 - 0.5 * __M_SQRT2) \ |
3996f34b UD |
667 | __value = logl (1.0 + __x); \ |
668 | else \ | |
669 | __asm __volatile__ \ | |
670 | ("fldln2\n\t" \ | |
671 | "fxch\n\t" \ | |
672 | "fyl2xp1" \ | |
5892e305 | 673 | : "=t" (__value) : "0" (__x) : "st(1)"); \ |
3996f34b | 674 | return __value) |
ed1825f8 | 675 | # endif |
3996f34b UD |
676 | |
677 | ||
678 | /* The argument range of the inline version of asinhl is slightly reduced. */ | |
300583a7 | 679 | __inline_mathcodeNP (asinh, __x, \ |
8f2ece69 | 680 | register long double __y = __fabsl (__x); \ |
0135bde4 | 681 | return (log1pl (__y * __y / (__libc_sqrtl (__y * __y + 1.0) + 1.0) + __y) \ |
ca34d7a7 | 682 | * __sgn1l (__x))) |
3996f34b | 683 | |
300583a7 | 684 | __inline_mathcodeNP (acosh, __x, \ |
0135bde4 | 685 | return logl (__x + __libc_sqrtl (__x - 1.0) * __libc_sqrtl (__x + 1.0))) |
3996f34b | 686 | |
300583a7 | 687 | __inline_mathcodeNP (atanh, __x, \ |
8f2ece69 | 688 | register long double __y = __fabsl (__x); \ |
bd355af0 | 689 | return -0.5 * log1pl (-(__y + __y) / (1.0 + __y)) * __sgn1l (__x)) |
3996f34b | 690 | |
3996f34b | 691 | /* The argument range of the inline version of hypotl is slightly reduced. */ |
0135bde4 UD |
692 | __inline_mathcodeNP2 (hypot, __x, __y, |
693 | return __libc_sqrtl (__x * __x + __y * __y)) | |
3996f34b | 694 | |
ed1825f8 | 695 | # if !__GNUC_PREREQ (3, 5) |
300583a7 | 696 | __inline_mathcodeNP(logb, __x, \ |
3996f34b UD |
697 | register long double __value; \ |
698 | register long double __junk; \ | |
699 | __asm __volatile__ \ | |
700 | ("fxtract\n\t" \ | |
701 | : "=t" (__junk), "=u" (__value) : "0" (__x)); \ | |
702 | return __value) | |
ed1825f8 | 703 | # endif |
3996f34b | 704 | |
ed1825f8 L |
705 | # endif |
706 | # endif | |
7d6a8338 | 707 | |
ed1825f8 L |
708 | # ifdef __USE_ISOC99 |
709 | # ifdef __FAST_MATH__ | |
3dbfd811 | 710 | |
ed1825f8 | 711 | # if !__GNUC_PREREQ (3, 5) |
300583a7 | 712 | __inline_mathop_declNP (log2, "fld1; fxch; fyl2x", "0" (__x) : "st(1)") |
ed1825f8 | 713 | # endif |
7d6a8338 | 714 | |
8f2ece69 | 715 | __MATH_INLINE float |
f377d022 | 716 | __NTH (ldexpf (float __x, int __y)) |
8f2ece69 UD |
717 | { |
718 | __ldexp_code; | |
719 | } | |
720 | ||
8f2ece69 | 721 | __MATH_INLINE long double |
f377d022 | 722 | __NTH (ldexpl (long double __x, int __y)) |
8f2ece69 UD |
723 | { |
724 | __ldexp_code; | |
725 | } | |
3996f34b | 726 | |
300583a7 | 727 | __inline_mathopNP (rint, "frndint") |
ed1825f8 | 728 | # endif /* __FAST_MATH__ */ |
7d6a8338 | 729 | |
ed1825f8 | 730 | # define __lrint_code \ |
33127459 UD |
731 | long int __lrintres; \ |
732 | __asm__ __volatile__ \ | |
733 | ("fistpl %0" \ | |
734 | : "=m" (__lrintres) : "t" (__x) : "st"); \ | |
735 | return __lrintres | |
736 | __MATH_INLINE long int | |
f377d022 | 737 | __NTH (lrintf (float __x)) |
33127459 UD |
738 | { |
739 | __lrint_code; | |
740 | } | |
741 | __MATH_INLINE long int | |
f377d022 | 742 | __NTH (lrint (double __x)) |
33127459 UD |
743 | { |
744 | __lrint_code; | |
745 | } | |
746 | __MATH_INLINE long int | |
f377d022 | 747 | __NTH (lrintl (long double __x)) |
33127459 UD |
748 | { |
749 | __lrint_code; | |
750 | } | |
ed1825f8 | 751 | # undef __lrint_code |
33127459 | 752 | |
ed1825f8 | 753 | # define __llrint_code \ |
33127459 UD |
754 | long long int __llrintres; \ |
755 | __asm__ __volatile__ \ | |
756 | ("fistpll %0" \ | |
757 | : "=m" (__llrintres) : "t" (__x) : "st"); \ | |
758 | return __llrintres | |
828beb13 | 759 | __extension__ |
33127459 | 760 | __MATH_INLINE long long int |
f377d022 | 761 | __NTH (llrintf (float __x)) |
33127459 UD |
762 | { |
763 | __llrint_code; | |
764 | } | |
828beb13 | 765 | __extension__ |
33127459 | 766 | __MATH_INLINE long long int |
f377d022 | 767 | __NTH (llrint (double __x)) |
33127459 UD |
768 | { |
769 | __llrint_code; | |
770 | } | |
828beb13 | 771 | __extension__ |
33127459 | 772 | __MATH_INLINE long long int |
f377d022 | 773 | __NTH (llrintl (long double __x)) |
33127459 UD |
774 | { |
775 | __llrint_code; | |
776 | } | |
ed1825f8 | 777 | # undef __llrint_code |
33127459 | 778 | |
ed1825f8 | 779 | # endif |
cd6ede75 | 780 | |
b20e47cb | 781 | |
ed1825f8 | 782 | # ifdef __USE_MISC |
8f2ece69 | 783 | |
ed1825f8 | 784 | # if defined __FAST_MATH__ && !__GNUC_PREREQ (3, 5) |
300583a7 | 785 | __inline_mathcodeNP2 (drem, __x, __y, \ |
3996f34b | 786 | register double __value; \ |
33127459 | 787 | register int __clobbered; \ |
3996f34b UD |
788 | __asm __volatile__ \ |
789 | ("1: fprem1\n\t" \ | |
790 | "fstsw %%ax\n\t" \ | |
791 | "sahf\n\t" \ | |
792 | "jp 1b" \ | |
33127459 | 793 | : "=t" (__value), "=&a" (__clobbered) : "0" (__x), "u" (__y) : "cc"); \ |
3996f34b | 794 | return __value) |
ed1825f8 | 795 | # endif |
3996f34b | 796 | |
b20e47cb | 797 | |
7799b7b3 | 798 | /* This function is used in the `isfinite' macro. */ |
7799b7b3 | 799 | __MATH_INLINE int |
f377d022 | 800 | __NTH (__finite (double __x)) |
7799b7b3 | 801 | { |
db24ce47 | 802 | return (__extension__ |
e150fddc | 803 | (((((union { double __d; int __i[2]; }) {__d: __x}).__i[1] |
77faa354 | 804 | | 0x800fffffu) + 1) >> 31)); |
7799b7b3 | 805 | } |
dd33e89f | 806 | |
ed1825f8 | 807 | # endif /* __USE_MISC */ |
b20e47cb | 808 | |
3996f34b | 809 | /* Undefine some of the large macros which are not used anymore. */ |
ed1825f8 L |
810 | # undef __atan2_code |
811 | # ifdef __FAST_MATH__ | |
812 | # undef __expm1_code | |
813 | # undef __exp_code | |
814 | # undef __sincos_code | |
815 | # endif /* __FAST_MATH__ */ | |
3996f34b | 816 | |
ed1825f8 | 817 | # endif /* __NO_MATH_INLINES */ |
f43ce637 UD |
818 | |
819 | ||
820 | /* This code is used internally in the GNU libc. */ | |
ed1825f8 | 821 | # ifdef __LIBC_INTERNAL_MATH_INLINES |
f43ce637 | 822 | __inline_mathop (__ieee754_sqrt, "fsqrt") |
4629c866 JM |
823 | __inline_mathcode2_ (long double, __ieee754_atan2l, __y, __x, |
824 | register long double __value; | |
825 | __asm __volatile__ ("fpatan\n\t" | |
826 | : "=t" (__value) | |
827 | : "0" (__x), "u" (__y) : "st(1)"); | |
828 | return __value;) | |
ed1825f8 | 829 | # endif |
f43ce637 | 830 | |
508ce3ac | 831 | #endif /* !__SSE2_MATH__ && !__x86_64__ */ |