]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/fpu/e_sqrtf.c
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / powerpc / fpu / e_sqrtf.c
CommitLineData
ffdd5e50 1/* Single-precision floating point square root.
04277e02 2 Copyright (C) 1997-2019 Free Software Foundation, Inc.
ffdd5e50
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
ffdd5e50
UD
18
19#include <math.h>
20#include <math_private.h>
418d99e6 21#include <fenv.h>
ffdd5e50
UD
22#include <fenv_libc.h>
23#include <inttypes.h>
e054f494 24#include <stdint.h>
ffdd5e50
UD
25#include <sysdep.h>
26#include <ldsodefs.h>
ffdd5e50 27
08cee2a4 28#ifndef _ARCH_PPCSQ
ffdd5e50
UD
29static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */
30static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
31static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
32static const float two48 = 281474976710656.0;
33static const float twom24 = 5.9604644775390625e-8;
34extern const float __t_sqrt[1024];
35
36/* The method is based on a description in
37 Computation of elementary functions on the IBM RISC System/6000 processor,
38 P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
868f7a40 39 Basically, it consists of two interleaved Newton-Raphson approximations,
ffdd5e50
UD
40 one to find the actual square root, and one to find its reciprocal
41 without the expense of a division operation. The tricky bit here
42 is the use of the POWER/PowerPC multiply-add operation to get the
43 required accuracy with high speed.
44
45 The argument reduction works by a combination of table lookup to
46 obtain the initial guesses, and some careful modification of the
47 generated guesses (which mostly runs on the integer unit, while the
868f7a40 48 Newton-Raphson is running on the FPU). */
ffdd5e50 49
ffdd5e50
UD
50float
51__slow_ieee754_sqrtf (float x)
ffdd5e50
UD
52{
53 const float inf = a_inf.value;
54
55 if (x > 0)
56 {
57 if (x != inf)
58 {
59 /* Variables named starting with 's' exist in the
60 argument-reduced space, so that 2 > sx >= 0.5,
61 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
62 Variables named ending with 'i' are integer versions of
63 floating-point values. */
64 float sx; /* The value of which we're trying to find the square
65 root. */
66 float sg, g; /* Guess of the square root of x. */
67 float sd, d; /* Difference between the square of the guess and x. */
68 float sy; /* Estimate of 1/2g (overestimated by 1ulp). */
69 float sy2; /* 2*sy */
70 float e; /* Difference between y*g and 1/2 (note that e==se). */
71 float shx; /* == sx * fsg */
72 float fsg; /* sg*fsg == g. */
73 fenv_t fe; /* Saved floating-point environment (stores rounding
74 mode and whether the inexact exception is
75 enabled). */
76 uint32_t xi, sxi, fsgi;
77 const float *t_sqrt;
78
79 GET_FLOAT_WORD (xi, x);
80 fe = fegetenv_register ();
81 relax_fenv_state ();
82 sxi = (xi & 0x3fffffff) | 0x3f000000;
83 SET_FLOAT_WORD (sx, sxi);
84 t_sqrt = __t_sqrt + (xi >> (23 - 8 - 1) & 0x3fe);
85 sg = t_sqrt[0];
86 sy = t_sqrt[1];
87
868f7a40 88 /* Here we have three Newton-Raphson iterations each of a
ffdd5e50
UD
89 division and a square root and the remainder of the
90 argument reduction, all interleaved. */
95c26233 91 sd = -__builtin_fmaf (sg, sg, -sx);
ffdd5e50
UD
92 fsgi = (xi + 0x40000000) >> 1 & 0x7f800000;
93 sy2 = sy + sy;
95c26233
JM
94 sg = __builtin_fmaf (sy, sd, sg); /* 16-bit approximation to
95 sqrt(sx). */
96 e = -__builtin_fmaf (sy, sg, -almost_half);
ffdd5e50 97 SET_FLOAT_WORD (fsg, fsgi);
95c26233
JM
98 sd = -__builtin_fmaf (sg, sg, -sx);
99 sy = __builtin_fmaf (e, sy2, sy);
ffdd5e50
UD
100 if ((xi & 0x7f800000) == 0)
101 goto denorm;
102 shx = sx * fsg;
95c26233
JM
103 sg = __builtin_fmaf (sy, sd, sg); /* 32-bit approximation to
104 sqrt(sx), but perhaps
105 rounded incorrectly. */
ffdd5e50
UD
106 sy2 = sy + sy;
107 g = sg * fsg;
95c26233
JM
108 e = -__builtin_fmaf (sy, sg, -almost_half);
109 d = -__builtin_fmaf (g, sg, -shx);
110 sy = __builtin_fmaf (e, sy2, sy);
ffdd5e50 111 fesetenv_register (fe);
95c26233 112 return __builtin_fmaf (sy, d, g);
ffdd5e50
UD
113 denorm:
114 /* For denormalised numbers, we normalise, calculate the
115 square root, and return an adjusted result. */
116 fesetenv_register (fe);
117 return __slow_ieee754_sqrtf (x * two48) * twom24;
118 }
119 }
120 else if (x < 0)
121 {
122 /* For some reason, some PowerPC32 processors don't implement
0ac5ae23 123 FE_INVALID_SQRT. */
ffdd5e50
UD
124#ifdef FE_INVALID_SQRT
125 feraiseexcept (FE_INVALID_SQRT);
c3a0ead4
UD
126
127 fenv_union_t u = { .fenv = fegetenv_register () };
4a28b3ca 128 if ((u.l & FE_INVALID) == 0)
ffdd5e50
UD
129#endif
130 feraiseexcept (FE_INVALID);
131 x = a_nan.value;
132 }
133 return f_washf (x);
134}
08cee2a4 135#endif /* _ARCH_PPCSQ */
ffdd5e50 136
8a6d5255 137#undef __ieee754_sqrtf
ffdd5e50
UD
138float
139__ieee754_sqrtf (float x)
ffdd5e50 140{
a51bc4fe 141 float z;
ffdd5e50 142
08cee2a4
AZ
143#ifdef _ARCH_PPCSQ
144 asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x));
145#else
146 z = __slow_ieee754_sqrtf (x);
147#endif
ffdd5e50
UD
148
149 return z;
150}
0ac5ae23 151strong_alias (__ieee754_sqrtf, __sqrtf_finite)