]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/fpu/e_sqrtf.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / fpu / e_sqrtf.c
CommitLineData
ffdd5e50 1/* Single-precision floating point square root.
b168057a 2 Copyright (C) 1997-2015 Free Software Foundation, Inc.
ffdd5e50
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
ffdd5e50
UD
18
19#include <math.h>
20#include <math_private.h>
21#include <fenv_libc.h>
22#include <inttypes.h>
e054f494 23#include <stdint.h>
ffdd5e50
UD
24#include <sysdep.h>
25#include <ldsodefs.h>
ffdd5e50
UD
26
27static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */
28static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
29static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
30static const float two48 = 281474976710656.0;
31static const float twom24 = 5.9604644775390625e-8;
32extern const float __t_sqrt[1024];
33
34/* The method is based on a description in
35 Computation of elementary functions on the IBM RISC System/6000 processor,
36 P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
868f7a40 37 Basically, it consists of two interleaved Newton-Raphson approximations,
ffdd5e50
UD
38 one to find the actual square root, and one to find its reciprocal
39 without the expense of a division operation. The tricky bit here
40 is the use of the POWER/PowerPC multiply-add operation to get the
41 required accuracy with high speed.
42
43 The argument reduction works by a combination of table lookup to
44 obtain the initial guesses, and some careful modification of the
45 generated guesses (which mostly runs on the integer unit, while the
868f7a40 46 Newton-Raphson is running on the FPU). */
ffdd5e50 47
ffdd5e50
UD
48float
49__slow_ieee754_sqrtf (float x)
ffdd5e50
UD
50{
51 const float inf = a_inf.value;
52
53 if (x > 0)
54 {
55 if (x != inf)
56 {
57 /* Variables named starting with 's' exist in the
58 argument-reduced space, so that 2 > sx >= 0.5,
59 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
60 Variables named ending with 'i' are integer versions of
61 floating-point values. */
62 float sx; /* The value of which we're trying to find the square
63 root. */
64 float sg, g; /* Guess of the square root of x. */
65 float sd, d; /* Difference between the square of the guess and x. */
66 float sy; /* Estimate of 1/2g (overestimated by 1ulp). */
67 float sy2; /* 2*sy */
68 float e; /* Difference between y*g and 1/2 (note that e==se). */
69 float shx; /* == sx * fsg */
70 float fsg; /* sg*fsg == g. */
71 fenv_t fe; /* Saved floating-point environment (stores rounding
72 mode and whether the inexact exception is
73 enabled). */
74 uint32_t xi, sxi, fsgi;
75 const float *t_sqrt;
76
77 GET_FLOAT_WORD (xi, x);
78 fe = fegetenv_register ();
79 relax_fenv_state ();
80 sxi = (xi & 0x3fffffff) | 0x3f000000;
81 SET_FLOAT_WORD (sx, sxi);
82 t_sqrt = __t_sqrt + (xi >> (23 - 8 - 1) & 0x3fe);
83 sg = t_sqrt[0];
84 sy = t_sqrt[1];
85
868f7a40 86 /* Here we have three Newton-Raphson iterations each of a
ffdd5e50
UD
87 division and a square root and the remainder of the
88 argument reduction, all interleaved. */
89 sd = -(sg * sg - sx);
90 fsgi = (xi + 0x40000000) >> 1 & 0x7f800000;
91 sy2 = sy + sy;
92 sg = sy * sd + sg; /* 16-bit approximation to sqrt(sx). */
93 e = -(sy * sg - almost_half);
94 SET_FLOAT_WORD (fsg, fsgi);
95 sd = -(sg * sg - sx);
96 sy = sy + e * sy2;
97 if ((xi & 0x7f800000) == 0)
98 goto denorm;
99 shx = sx * fsg;
100 sg = sg + sy * sd; /* 32-bit approximation to sqrt(sx),
101 but perhaps rounded incorrectly. */
102 sy2 = sy + sy;
103 g = sg * fsg;
104 e = -(sy * sg - almost_half);
105 d = -(g * sg - shx);
106 sy = sy + e * sy2;
107 fesetenv_register (fe);
108 return g + sy * d;
109 denorm:
110 /* For denormalised numbers, we normalise, calculate the
111 square root, and return an adjusted result. */
112 fesetenv_register (fe);
113 return __slow_ieee754_sqrtf (x * two48) * twom24;
114 }
115 }
116 else if (x < 0)
117 {
118 /* For some reason, some PowerPC32 processors don't implement
0ac5ae23 119 FE_INVALID_SQRT. */
ffdd5e50
UD
120#ifdef FE_INVALID_SQRT
121 feraiseexcept (FE_INVALID_SQRT);
c3a0ead4
UD
122
123 fenv_union_t u = { .fenv = fegetenv_register () };
4a28b3ca 124 if ((u.l & FE_INVALID) == 0)
ffdd5e50
UD
125#endif
126 feraiseexcept (FE_INVALID);
127 x = a_nan.value;
128 }
129 return f_washf (x);
130}
131
8a6d5255 132#undef __ieee754_sqrtf
ffdd5e50
UD
133float
134__ieee754_sqrtf (float x)
ffdd5e50
UD
135{
136 double z;
137
433f49c4
UD
138 /* If the CPU is 64-bit we can use the optional FP instructions. */
139 if (__CPU_HAS_FSQRT)
ffdd5e50 140 {
c3a0ead4 141 /* Volatile is required to prevent the compiler from moving the
0ac5ae23 142 fsqrt instruction above the branch. */
ffdd5e50
UD
143 __asm __volatile (" fsqrts %0,%1\n"
144 :"=f" (z):"f" (x));
145 }
146 else
147 z = __slow_ieee754_sqrtf (x);
148
149 return z;
150}
0ac5ae23 151strong_alias (__ieee754_sqrtf, __sqrtf_finite)