]>
Commit | Line | Data |
---|---|---|
e4d82761 UD |
1 | /* |
2 | * IBM Accurate Mathematical Library | |
aeb25823 | 3 | * written by International Business Machines Corp. |
04277e02 | 4 | * Copyright (C) 2001-2019 Free Software Foundation, Inc. |
e4d82761 UD |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU Lesser General Public License as published by | |
cc7375ce | 8 | * the Free Software Foundation; either version 2.1 of the License, or |
e4d82761 | 9 | * (at your option) any later version. |
50944bca | 10 | * |
e4d82761 UD |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
c6c6dd48 | 14 | * GNU Lesser General Public License for more details. |
e4d82761 UD |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public License | |
59ba27a6 | 17 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
e4d82761 UD |
18 | */ |
19 | /****************************************************************************/ | |
20 | /* MODULE_NAME:mpsqrt.c */ | |
21 | /* */ | |
22 | /* FUNCTION:mpsqrt */ | |
23 | /* fastiroot */ | |
24 | /* */ | |
25 | /* FILES NEEDED:endian.h mpa.h mpsqrt.h */ | |
26 | /* mpa.c */ | |
27 | /* Multi-Precision square root function subroutine for precision p >= 4. */ | |
28 | /* The relative error is bounded by 3.501*r**(1-p), where r=2**24. */ | |
29 | /* */ | |
30 | /****************************************************************************/ | |
31 | #include "endian.h" | |
32 | #include "mpa.h" | |
33 | ||
31d3cc00 UD |
34 | #ifndef SECTION |
35 | # define SECTION | |
36 | #endif | |
37 | ||
38 | #include "mpsqrt.h" | |
39 | ||
e4d82761 UD |
40 | /****************************************************************************/ |
41 | /* Multi-Precision square root function subroutine for precision p >= 4. */ | |
42 | /* The relative error is bounded by 3.501*r**(1-p), where r=2**24. */ | |
43 | /* Routine receives two pointers to Multi Precision numbers: */ | |
44 | /* x (left argument) and y (next argument). Routine also receives precision */ | |
45 | /* p as integer. Routine computes sqrt(*x) and stores result in *y */ | |
46 | /****************************************************************************/ | |
47 | ||
09c14ed2 | 48 | static double fastiroot (double); |
e4d82761 | 49 | |
31d3cc00 UD |
50 | void |
51 | SECTION | |
09c14ed2 SP |
52 | __mpsqrt (mp_no *x, mp_no *y, int p) |
53 | { | |
54 | int i, m, ey; | |
55 | double dx, dy; | |
b33d4ce4 SP |
56 | static const mp_no mphalf = {0, {1.0, HALFRAD}}; |
57 | static const mp_no mp3halfs = {1, {1.0, 1.0, HALFRAD}}; | |
09c14ed2 | 58 | mp_no mpxn, mpz, mpu, mpt1, mpt2; |
e4d82761 | 59 | |
09c14ed2 SP |
60 | ey = EX / 2; |
61 | __cpy (x, &mpxn, p); | |
62 | mpxn.e -= (ey + ey); | |
63 | __mp_dbl (&mpxn, &dx, p); | |
64 | dy = fastiroot (dx); | |
65 | __dbl_mp (dy, &mpu, p); | |
66 | __mul (&mpxn, &mphalf, &mpz, p); | |
e4d82761 | 67 | |
09c14ed2 SP |
68 | m = __mpsqrt_mp[p]; |
69 | for (i = 0; i < m; i++) | |
70 | { | |
71 | __sqr (&mpu, &mpt1, p); | |
72 | __mul (&mpt1, &mpz, &mpt2, p); | |
73 | __sub (&mp3halfs, &mpt2, &mpt1, p); | |
74 | __mul (&mpu, &mpt1, &mpt2, p); | |
75 | __cpy (&mpt2, &mpu, p); | |
76 | } | |
77 | __mul (&mpxn, &mpu, y, p); | |
78 | EY += ey; | |
e4d82761 UD |
79 | } |
80 | ||
81 | /***********************************************************/ | |
82 | /* Compute a double precision approximation for 1/sqrt(x) */ | |
83 | /* with the relative error bounded by 2**-51. */ | |
84 | /***********************************************************/ | |
31d3cc00 UD |
85 | static double |
86 | SECTION | |
09c14ed2 SP |
87 | fastiroot (double x) |
88 | { | |
89 | union | |
90 | { | |
91 | int i[2]; | |
92 | double d; | |
93 | } p, q; | |
94 | double y, z, t; | |
bb3f4825 | 95 | int n; |
09c14ed2 SP |
96 | static const double c0 = 0.99674, c1 = -0.53380; |
97 | static const double c2 = 0.45472, c3 = -0.21553; | |
50944bca | 98 | |
e4d82761 | 99 | p.d = x; |
09c14ed2 | 100 | p.i[HIGH_HALF] = (p.i[HIGH_HALF] & 0x3FFFFFFF) | 0x3FE00000; |
e4d82761 UD |
101 | q.d = x; |
102 | y = p.d; | |
09c14ed2 SP |
103 | z = y - 1.0; |
104 | n = (q.i[HIGH_HALF] - p.i[HIGH_HALF]) >> 1; | |
105 | z = ((c3 * z + c2) * z + c1) * z + c0; /* 2**-7 */ | |
106 | z = z * (1.5 - 0.5 * y * z * z); /* 2**-14 */ | |
107 | p.d = z * (1.5 - 0.5 * y * z * z); /* 2**-28 */ | |
e4d82761 | 108 | p.i[HIGH_HALF] -= n; |
09c14ed2 SP |
109 | t = x * p.d; |
110 | return p.d * (1.5 - 0.5 * p.d * t); | |
e4d82761 | 111 | } |