]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/fpu/e_sqrt.c
Update copyright dates with scripts/update-copyrights
[thirdparty/glibc.git] / sysdeps / powerpc / fpu / e_sqrt.c
CommitLineData
ffdd5e50 1/* Double-precision floating point square root.
2b778ceb 2 Copyright (C) 1997-2021 Free Software Foundation, Inc.
ffdd5e50
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
ffdd5e50
UD
18
19#include <math.h>
20#include <math_private.h>
21#include <fenv_libc.h>
220622dd 22#include <libm-alias-finite.h>
169ea8f9 23#include <math-use-builtins.h>
ffdd5e50 24
169ea8f9
AZ
25double
26__ieee754_sqrt (double x)
27{
28#if USE_SQRT_BUILTIN
29 return __builtin_sqrt (x);
30#else
ffdd5e50
UD
31/* The method is based on a description in
32 Computation of elementary functions on the IBM RISC System/6000 processor,
33 P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
868f7a40 34 Basically, it consists of two interleaved Newton-Raphson approximations,
ffdd5e50
UD
35 one to find the actual square root, and one to find its reciprocal
36 without the expense of a division operation. The tricky bit here
37 is the use of the POWER/PowerPC multiply-add operation to get the
38 required accuracy with high speed.
39
40 The argument reduction works by a combination of table lookup to
41 obtain the initial guesses, and some careful modification of the
42 generated guesses (which mostly runs on the integer unit, while the
868f7a40 43 Newton-Raphson is running on the FPU). */
ffdd5e50 44
169ea8f9 45 extern const float __t_sqrt[1024];
ffdd5e50
UD
46
47 if (x > 0)
48 {
49 /* schedule the EXTRACT_WORDS to get separation between the store
0ac5ae23 50 and the load. */
ffdd5e50
UD
51 ieee_double_shape_type ew_u;
52 ieee_double_shape_type iw_u;
53 ew_u.value = (x);
169ea8f9 54 if (x != INFINITY)
ffdd5e50
UD
55 {
56 /* Variables named starting with 's' exist in the
57 argument-reduced space, so that 2 > sx >= 0.5,
58 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
59 Variables named ending with 'i' are integer versions of
60 floating-point values. */
61 double sx; /* The value of which we're trying to find the
62 square root. */
63 double sg, g; /* Guess of the square root of x. */
64 double sd, d; /* Difference between the square of the guess and x. */
65 double sy; /* Estimate of 1/2g (overestimated by 1ulp). */
66 double sy2; /* 2*sy */
67 double e; /* Difference between y*g and 1/2 (se = e * fsy). */
68 double shx; /* == sx * fsg */
69 double fsg; /* sg*fsg == g. */
70 fenv_t fe; /* Saved floating-point environment (stores rounding
71 mode and whether the inexact exception is
72 enabled). */
73 uint32_t xi0, xi1, sxi, fsgi;
74 const float *t_sqrt;
75
76 fe = fegetenv_register ();
77 /* complete the EXTRACT_WORDS (xi0,xi1,x) operation. */
78 xi0 = ew_u.parts.msw;
79 xi1 = ew_u.parts.lsw;
80 relax_fenv_state ();
81 sxi = (xi0 & 0x3fffffff) | 0x3fe00000;
82 /* schedule the INSERT_WORDS (sx, sxi, xi1) to get separation
83 between the store and the load. */
84 iw_u.parts.msw = sxi;
85 iw_u.parts.lsw = xi1;
86 t_sqrt = __t_sqrt + (xi0 >> (52 - 32 - 8 - 1) & 0x3fe);
87 sg = t_sqrt[0];
88 sy = t_sqrt[1];
89 /* complete the INSERT_WORDS (sx, sxi, xi1) operation. */
90 sx = iw_u.value;
91
868f7a40 92 /* Here we have three Newton-Raphson iterations each of a
ffdd5e50
UD
93 division and a square root and the remainder of the
94 argument reduction, all interleaved. */
e8bd5286 95 sd = -__builtin_fma (sg, sg, -sx);
ffdd5e50
UD
96 fsgi = (xi0 + 0x40000000) >> 1 & 0x7ff00000;
97 sy2 = sy + sy;
e8bd5286
JM
98 sg = __builtin_fma (sy, sd, sg); /* 16-bit approximation to
99 sqrt(sx). */
ffdd5e50
UD
100
101 /* schedule the INSERT_WORDS (fsg, fsgi, 0) to get separation
102 between the store and the load. */
103 INSERT_WORDS (fsg, fsgi, 0);
104 iw_u.parts.msw = fsgi;
105 iw_u.parts.lsw = (0);
169ea8f9 106 e = -__builtin_fma (sy, sg, -0x1.0000000000001p-1);
e8bd5286 107 sd = -__builtin_fma (sg, sg, -sx);
ffdd5e50
UD
108 if ((xi0 & 0x7ff00000) == 0)
109 goto denorm;
e8bd5286
JM
110 sy = __builtin_fma (e, sy2, sy);
111 sg = __builtin_fma (sy, sd, sg); /* 32-bit approximation to
112 sqrt(sx). */
ffdd5e50
UD
113 sy2 = sy + sy;
114 /* complete the INSERT_WORDS (fsg, fsgi, 0) operation. */
115 fsg = iw_u.value;
169ea8f9 116 e = -__builtin_fma (sy, sg, -0x1.0000000000001p-1);
e8bd5286
JM
117 sd = -__builtin_fma (sg, sg, -sx);
118 sy = __builtin_fma (e, sy2, sy);
ffdd5e50 119 shx = sx * fsg;
e8bd5286
JM
120 sg = __builtin_fma (sy, sd, sg); /* 64-bit approximation to
121 sqrt(sx), but perhaps
122 rounded incorrectly. */
ffdd5e50
UD
123 sy2 = sy + sy;
124 g = sg * fsg;
169ea8f9 125 e = -__builtin_fma (sy, sg, -0x1.0000000000001p-1);
e8bd5286
JM
126 d = -__builtin_fma (g, sg, -shx);
127 sy = __builtin_fma (e, sy2, sy);
ffdd5e50 128 fesetenv_register (fe);
e8bd5286 129 return __builtin_fma (sy, d, g);
ffdd5e50
UD
130 denorm:
131 /* For denormalised numbers, we normalise, calculate the
132 square root, and return an adjusted result. */
133 fesetenv_register (fe);
169ea8f9 134 return __ieee754_sqrt (x * 0x1p+108f) * 0x1p-54f;
ffdd5e50
UD
135 }
136 }
137 else if (x < 0)
138 {
139 /* For some reason, some PowerPC32 processors don't implement
0ac5ae23 140 FE_INVALID_SQRT. */
169ea8f9 141# ifdef FE_INVALID_SQRT
0747f818 142 __feraiseexcept (FE_INVALID_SQRT);
c3a0ead4
UD
143
144 fenv_union_t u = { .fenv = fegetenv_register () };
4a28b3ca 145 if ((u.l & FE_INVALID) == 0)
169ea8f9 146# endif
0747f818 147 __feraiseexcept (FE_INVALID);
169ea8f9 148 x = NAN;
ffdd5e50
UD
149 }
150 return f_wash (x);
169ea8f9 151#endif /* USE_SQRT_BUILTIN */
ffdd5e50
UD
152}
153
220622dd 154libm_alias_finite (__ieee754_sqrt, __sqrt)