]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/powerpc/fpu/e_sqrtf.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / powerpc / fpu / e_sqrtf.c
CommitLineData
ffdd5e50 1/* Single-precision floating point square root.
688903eb 2 Copyright (C) 1997-2018 Free Software Foundation, Inc.
ffdd5e50
UD
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
ffdd5e50
UD
18
19#include <math.h>
20#include <math_private.h>
21#include <fenv_libc.h>
22#include <inttypes.h>
e054f494 23#include <stdint.h>
ffdd5e50
UD
24#include <sysdep.h>
25#include <ldsodefs.h>
ffdd5e50 26
08cee2a4 27#ifndef _ARCH_PPCSQ
ffdd5e50
UD
28static const float almost_half = 0.50000006; /* 0.5 + 2^-24 */
29static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
30static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
31static const float two48 = 281474976710656.0;
32static const float twom24 = 5.9604644775390625e-8;
33extern const float __t_sqrt[1024];
34
35/* The method is based on a description in
36 Computation of elementary functions on the IBM RISC System/6000 processor,
37 P. W. Markstein, IBM J. Res. Develop, 34(1) 1990.
868f7a40 38 Basically, it consists of two interleaved Newton-Raphson approximations,
ffdd5e50
UD
39 one to find the actual square root, and one to find its reciprocal
40 without the expense of a division operation. The tricky bit here
41 is the use of the POWER/PowerPC multiply-add operation to get the
42 required accuracy with high speed.
43
44 The argument reduction works by a combination of table lookup to
45 obtain the initial guesses, and some careful modification of the
46 generated guesses (which mostly runs on the integer unit, while the
868f7a40 47 Newton-Raphson is running on the FPU). */
ffdd5e50 48
ffdd5e50
UD
49float
50__slow_ieee754_sqrtf (float x)
ffdd5e50
UD
51{
52 const float inf = a_inf.value;
53
54 if (x > 0)
55 {
56 if (x != inf)
57 {
58 /* Variables named starting with 's' exist in the
59 argument-reduced space, so that 2 > sx >= 0.5,
60 1.41... > sg >= 0.70.., 0.70.. >= sy > 0.35... .
61 Variables named ending with 'i' are integer versions of
62 floating-point values. */
63 float sx; /* The value of which we're trying to find the square
64 root. */
65 float sg, g; /* Guess of the square root of x. */
66 float sd, d; /* Difference between the square of the guess and x. */
67 float sy; /* Estimate of 1/2g (overestimated by 1ulp). */
68 float sy2; /* 2*sy */
69 float e; /* Difference between y*g and 1/2 (note that e==se). */
70 float shx; /* == sx * fsg */
71 float fsg; /* sg*fsg == g. */
72 fenv_t fe; /* Saved floating-point environment (stores rounding
73 mode and whether the inexact exception is
74 enabled). */
75 uint32_t xi, sxi, fsgi;
76 const float *t_sqrt;
77
78 GET_FLOAT_WORD (xi, x);
79 fe = fegetenv_register ();
80 relax_fenv_state ();
81 sxi = (xi & 0x3fffffff) | 0x3f000000;
82 SET_FLOAT_WORD (sx, sxi);
83 t_sqrt = __t_sqrt + (xi >> (23 - 8 - 1) & 0x3fe);
84 sg = t_sqrt[0];
85 sy = t_sqrt[1];
86
868f7a40 87 /* Here we have three Newton-Raphson iterations each of a
ffdd5e50
UD
88 division and a square root and the remainder of the
89 argument reduction, all interleaved. */
95c26233 90 sd = -__builtin_fmaf (sg, sg, -sx);
ffdd5e50
UD
91 fsgi = (xi + 0x40000000) >> 1 & 0x7f800000;
92 sy2 = sy + sy;
95c26233
JM
93 sg = __builtin_fmaf (sy, sd, sg); /* 16-bit approximation to
94 sqrt(sx). */
95 e = -__builtin_fmaf (sy, sg, -almost_half);
ffdd5e50 96 SET_FLOAT_WORD (fsg, fsgi);
95c26233
JM
97 sd = -__builtin_fmaf (sg, sg, -sx);
98 sy = __builtin_fmaf (e, sy2, sy);
ffdd5e50
UD
99 if ((xi & 0x7f800000) == 0)
100 goto denorm;
101 shx = sx * fsg;
95c26233
JM
102 sg = __builtin_fmaf (sy, sd, sg); /* 32-bit approximation to
103 sqrt(sx), but perhaps
104 rounded incorrectly. */
ffdd5e50
UD
105 sy2 = sy + sy;
106 g = sg * fsg;
95c26233
JM
107 e = -__builtin_fmaf (sy, sg, -almost_half);
108 d = -__builtin_fmaf (g, sg, -shx);
109 sy = __builtin_fmaf (e, sy2, sy);
ffdd5e50 110 fesetenv_register (fe);
95c26233 111 return __builtin_fmaf (sy, d, g);
ffdd5e50
UD
112 denorm:
113 /* For denormalised numbers, we normalise, calculate the
114 square root, and return an adjusted result. */
115 fesetenv_register (fe);
116 return __slow_ieee754_sqrtf (x * two48) * twom24;
117 }
118 }
119 else if (x < 0)
120 {
121 /* For some reason, some PowerPC32 processors don't implement
0ac5ae23 122 FE_INVALID_SQRT. */
ffdd5e50
UD
123#ifdef FE_INVALID_SQRT
124 feraiseexcept (FE_INVALID_SQRT);
c3a0ead4
UD
125
126 fenv_union_t u = { .fenv = fegetenv_register () };
4a28b3ca 127 if ((u.l & FE_INVALID) == 0)
ffdd5e50
UD
128#endif
129 feraiseexcept (FE_INVALID);
130 x = a_nan.value;
131 }
132 return f_washf (x);
133}
08cee2a4 134#endif /* _ARCH_PPCSQ */
ffdd5e50 135
8a6d5255 136#undef __ieee754_sqrtf
ffdd5e50
UD
137float
138__ieee754_sqrtf (float x)
ffdd5e50
UD
139{
140 double z;
141
08cee2a4
AZ
142#ifdef _ARCH_PPCSQ
143 asm ("fsqrts %0,%1\n" :"=f" (z):"f" (x));
144#else
145 z = __slow_ieee754_sqrtf (x);
146#endif
ffdd5e50
UD
147
148 return z;
149}
0ac5ae23 150strong_alias (__ieee754_sqrtf, __sqrtf_finite)