]>
Commit | Line | Data |
---|---|---|
99dee823 | 1 | /* Copyright (C) 2008-2021 Free Software Foundation, Inc. |
d38a64b4 JR |
2 | Contributor: Joern Rennecke <joern.rennecke@embecosm.com> |
3 | on behalf of Synopsys Inc. | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
26 | /* We use a polynom similar to a Tchebycheff polynom to get an initial | |
27 | seed, and then use a newton-raphson iteration step to get an | |
28 | approximate result | |
29 | If this result can't be rounded to the exact result with confidence, we | |
30 | round to the value between the two closest representable values, and | |
31 | test if the correctly rounded value is above or below this value. | |
32 | ||
33 | Because of the Newton-raphson iteration step, an error in the seed at X | |
34 | is amplified by X. Therefore, we don't want a Tchebycheff polynom | |
35 | or a polynom that is close to optimal according to the maximum norm | |
36 | on the errro of the seed value; we want one that is close to optimal | |
37 | according to the maximum norm on the error of the result, i.e. we | |
38 | want the maxima of the polynom to increase linearily. | |
39 | Given an interval [X0,X2) over which to approximate, | |
40 | with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have, | |
41 | like for Tchebycheff polynoms: | |
42 | P(0) := 1 | |
43 | but then we have: | |
44 | P(1) := X + S*D | |
45 | P(2) := 2 * X^2 + S*D * X - D^2 | |
46 | Then again: | |
47 | P(n+1) := 2 * X * P(n) - D^2 * P (n-1) | |
48 | */ | |
49 | ||
50 | static long double merr = 42.; | |
51 | ||
52 | double | |
53 | err (long double a0, long double a1, long double x) | |
54 | { | |
55 | long double y0 = a0 + (x-1)*a1; | |
56 | ||
57 | long double approx = 2. * y0 - y0 * x * y0; | |
58 | long double true = 1./x; | |
59 | long double err = approx - true; | |
60 | ||
61 | if (err <= -1./65536./16384.) | |
62 | printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n", | |
63 | (double)x, (double)approx, (double)true); | |
64 | if (merr > err) | |
65 | merr = err; | |
66 | return err; | |
67 | } | |
68 | ||
69 | int | |
70 | main (void) | |
71 | { | |
72 | long double T[5]; /* Taylor polynom */ | |
73 | long double P[5][5]; | |
74 | int i, j; | |
75 | long double X0, X1, X2, S; | |
76 | long double inc = 1./64; | |
77 | long double D = inc*0.5; | |
78 | long i0, i1, i2, io; | |
79 | ||
80 | memset (P, 0, sizeof (P)); | |
81 | P[0][0] = 1.; | |
82 | for (i = 1; i < 5; i++) | |
83 | P[i][i] = 1 << i-1; | |
84 | P[2][0] = -D*D; | |
85 | for (X0 = 1.; X0 < 2.; X0 += inc) | |
86 | { | |
87 | X1 = X0 + inc * 0.5; | |
88 | X2 = X0 + inc; | |
89 | S = D / X1; | |
90 | T[0] = 1./X1; | |
91 | for (i = 1; i < 5; i++) | |
92 | T[i] = T[i-1] * -T[0]; | |
93 | #if 0 | |
94 | printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2], | |
95 | (double)T[3], (double)T[4]); | |
96 | #endif | |
97 | P[1][0] = S*D; | |
98 | P[2][1] = S*D; | |
99 | for (i = 3; i < 5; i++) | |
100 | { | |
101 | P[i][0] = -D*D*P[i-2][0]; | |
102 | for (j = 1; j < i; j++) | |
103 | P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j]; | |
104 | } | |
105 | #if 0 | |
106 | printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2], | |
107 | (double)P[3][3], (double)P[3][4]); | |
108 | printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2], | |
109 | (double)P[4][3], (double)P[4][4]); | |
110 | #endif | |
111 | for (i = 4; i > 1; i--) | |
112 | { | |
113 | long double a = T[i]/P[i][i]; | |
114 | ||
115 | for (j = 0; j < i; j++) | |
116 | T[j] -= a * P[i][j]; | |
117 | } | |
118 | #if 0 | |
119 | printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); | |
120 | #endif | |
121 | #if 0 | |
122 | i2 = T[2]*1024; | |
123 | long double a = (T[2]-i/1024.)/P[2][2]; | |
124 | for (j = 0; j < 2; j++) | |
125 | T[j] -= a * P[2][j]; | |
126 | #else | |
127 | i2 = 0; | |
128 | #endif | |
129 | long double T0, Ti1; | |
130 | for (i = 0, i0 = 0; i < 4; i++) | |
131 | { | |
132 | ||
133 | i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5; | |
134 | i1 = - (-i1 & 0x0fff); | |
135 | Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL); | |
136 | T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1; | |
137 | i0 = T0 * 1024 * 1024 + 0.5; | |
138 | i0 &= 0xfffff; | |
139 | } | |
140 | #if 0 | |
141 | printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]); | |
142 | #endif | |
143 | io = (unsigned)(-i1 << 20) | i0; | |
144 | long double A1 = (unsigned)io/-65536./65536.; | |
145 | long double A0 = (unsigned)(io << 12)/65536./65536.; | |
146 | long double Xm0 = 1./sqrt (-A1); | |
147 | long double Xm1 = 0.5+0.5*-A0/A1; | |
148 | #if 0 | |
149 | printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0); | |
150 | printf ("%.12f %.12f %.12f\n", | |
151 | err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2)); | |
152 | printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1); | |
153 | printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1)); | |
154 | #endif | |
155 | printf ("\t.long 0x%x\n", io); | |
156 | } | |
157 | #if 0 | |
158 | printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2)); | |
159 | #endif | |
160 | return 0; | |
161 | } |