]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
Remove unnecessary local variable mptwo
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc64 / power4 / fpu / mpa.c
1
2 /*
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 /************************************************************************/
21 /* MODULE_NAME: mpa.c */
22 /* */
23 /* FUNCTIONS: */
24 /* mcr */
25 /* acr */
26 /* cpy */
27 /* norm */
28 /* denorm */
29 /* mp_dbl */
30 /* dbl_mp */
31 /* add_magnitudes */
32 /* sub_magnitudes */
33 /* add */
34 /* sub */
35 /* mul */
36 /* inv */
37 /* dvd */
38 /* */
39 /* Arithmetic functions for multiple precision numbers. */
40 /* Relative errors are bounded */
41 /************************************************************************/
42
43
44 #include "endian.h"
45 #include "mpa.h"
46 #include "mpa2.h"
47 #include <sys/param.h>
48
49 const mp_no mpone = {1, {1.0, 1.0}};
50 const mp_no mptwo = {1, {1.0, 2.0}};
51
52 /* Compare mantissa of two multiple precision numbers regardless of the sign
53 and exponent of the numbers. */
54 static int mcr(const mp_no *x, const mp_no *y, int p) {
55 long i;
56 long p2 = p;
57 for (i=1; i<=p2; i++) {
58 if (X[i] == Y[i]) continue;
59 else if (X[i] > Y[i]) return 1;
60 else return -1; }
61 return 0;
62 }
63
64 /* Compare the absolute values of two multiple precision numbers. */
65 int __acr(const mp_no *x, const mp_no *y, int p) {
66 long i;
67
68 if (X[0] == ZERO) {
69 if (Y[0] == ZERO) i= 0;
70 else i=-1;
71 }
72 else if (Y[0] == ZERO) i= 1;
73 else {
74 if (EX > EY) i= 1;
75 else if (EX < EY) i=-1;
76 else i= mcr(x,y,p);
77 }
78
79 return i;
80 }
81
82 /* Copy multiple precision number X into Y. They could be the same
83 number. */
84 void __cpy(const mp_no *x, mp_no *y, int p) {
85 long i;
86
87 EY = EX;
88 for (i=0; i <= p; i++) Y[i] = X[i];
89
90 return;
91 }
92
93 /* Convert a multiple precision number *X into a double precision
94 number *Y, normalized case (|x| >= 2**(-1022))). */
95 static void norm(const mp_no *x, double *y, int p)
96 {
97 #define R RADIXI
98 long i;
99 double a,c,u,v,z[5];
100 if (p<5) {
101 if (p==1) c = X[1];
102 else if (p==2) c = X[1] + R* X[2];
103 else if (p==3) c = X[1] + R*(X[2] + R* X[3]);
104 else if (p==4) c =(X[1] + R* X[2]) + R*R*(X[3] + R*X[4]);
105 }
106 else {
107 for (a=ONE, z[1]=X[1]; z[1] < TWO23; )
108 {a *= TWO; z[1] *= TWO; }
109
110 for (i=2; i<5; i++) {
111 z[i] = X[i]*a;
112 u = (z[i] + CUTTER)-CUTTER;
113 if (u > z[i]) u -= RADIX;
114 z[i] -= u;
115 z[i-1] += u*RADIXI;
116 }
117
118 u = (z[3] + TWO71) - TWO71;
119 if (u > z[3]) u -= TWO19;
120 v = z[3]-u;
121
122 if (v == TWO18) {
123 if (z[4] == ZERO) {
124 for (i=5; i <= p; i++) {
125 if (X[i] == ZERO) continue;
126 else {z[3] += ONE; break; }
127 }
128 }
129 else z[3] += ONE;
130 }
131
132 c = (z[1] + R *(z[2] + R * z[3]))/a;
133 }
134
135 c *= X[0];
136
137 for (i=1; i<EX; i++) c *= RADIX;
138 for (i=1; i>EX; i--) c *= RADIXI;
139
140 *y = c;
141 return;
142 #undef R
143 }
144
145 /* Convert a multiple precision number *X into a double precision
146 number *Y, Denormal case (|x| < 2**(-1022))). */
147 static void denorm(const mp_no *x, double *y, int p)
148 {
149 long i,k;
150 long p2 = p;
151 double c,u,z[5];
152
153 #define R RADIXI
154 if (EX<-44 || (EX==-44 && X[1]<TWO5))
155 { *y=ZERO; return; }
156
157 if (p2==1) {
158 if (EX==-42) {z[1]=X[1]+TWO10; z[2]=ZERO; z[3]=ZERO; k=3;}
159 else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=ZERO; k=2;}
160 else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
161 }
162 else if (p2==2) {
163 if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; z[3]=ZERO; k=3;}
164 else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; z[3]=X[2]; k=2;}
165 else {z[1]= TWO10; z[2]=ZERO; z[3]=X[1]; k=1;}
166 }
167 else {
168 if (EX==-42) {z[1]=X[1]+TWO10; z[2]=X[2]; k=3;}
169 else if (EX==-43) {z[1]= TWO10; z[2]=X[1]; k=2;}
170 else {z[1]= TWO10; z[2]=ZERO; k=1;}
171 z[3] = X[k];
172 }
173
174 u = (z[3] + TWO57) - TWO57;
175 if (u > z[3]) u -= TWO5;
176
177 if (u==z[3]) {
178 for (i=k+1; i <= p2; i++) {
179 if (X[i] == ZERO) continue;
180 else {z[3] += ONE; break; }
181 }
182 }
183
184 c = X[0]*((z[1] + R*(z[2] + R*z[3])) - TWO10);
185
186 *y = c*TWOM1032;
187 return;
188
189 #undef R
190 }
191
192 /* Convert multiple precision number *X into double precision number *Y. The
193 result is correctly rounded to the nearest/even. */
194 void __mp_dbl(const mp_no *x, double *y, int p) {
195
196 if (X[0] == ZERO) {*y = ZERO; return; }
197
198 if (EX> -42) norm(x,y,p);
199 else if (EX==-42 && X[1]>=TWO10) norm(x,y,p);
200 else denorm(x,y,p);
201 }
202
203 /* Get the multiple precision equivalent of X into *Y. If the precision is too
204 small, the result is truncated. */
205 void __dbl_mp(double x, mp_no *y, int p) {
206
207 long i,n;
208 long p2 = p;
209 double u;
210
211 /* Sign. */
212 if (x == ZERO) {Y[0] = ZERO; return; }
213 else if (x > ZERO) Y[0] = ONE;
214 else {Y[0] = MONE; x=-x; }
215
216 /* Exponent. */
217 for (EY=ONE; x >= RADIX; EY += ONE) x *= RADIXI;
218 for ( ; x < ONE; EY -= ONE) x *= RADIX;
219
220 /* Digits. */
221 n=MIN(p2,4);
222 for (i=1; i<=n; i++) {
223 u = (x + TWO52) - TWO52;
224 if (u>x) u -= ONE;
225 Y[i] = u; x -= u; x *= RADIX; }
226 for ( ; i<=p2; i++) Y[i] = ZERO;
227 return;
228 }
229
230 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
231 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
232 Y and Z. No guard digit is used. The result equals the exact sum,
233 truncated. */
234 static void add_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
235
236 long i,j,k;
237 long p2 = p;
238
239 EZ = EX;
240
241 i=p2; j=p2+ EY - EX; k=p2+1;
242
243 if (j<1)
244 {__cpy(x,z,p); return; }
245 else Z[k] = ZERO;
246
247 for (; j>0; i--,j--) {
248 Z[k] += X[i] + Y[j];
249 if (Z[k] >= RADIX) {
250 Z[k] -= RADIX;
251 Z[--k] = ONE; }
252 else
253 Z[--k] = ZERO;
254 }
255
256 for (; i>0; i--) {
257 Z[k] += X[i];
258 if (Z[k] >= RADIX) {
259 Z[k] -= RADIX;
260 Z[--k] = ONE; }
261 else
262 Z[--k] = ZERO;
263 }
264
265 if (Z[1] == ZERO) {
266 for (i=1; i<=p2; i++) Z[i] = Z[i+1]; }
267 else EZ += ONE;
268 }
269
270 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
271 The sign of the difference *Z is not changed. X and Y may overlap but not X
272 and Z or Y and Z. One guard digit is used. The error is less than one
273 ULP. */
274 static void sub_magnitudes(const mp_no *x, const mp_no *y, mp_no *z, int p) {
275
276 long i,j,k;
277 long p2 = p;
278
279 EZ = EX;
280
281 if (EX == EY) {
282 i=j=k=p2;
283 Z[k] = Z[k+1] = ZERO; }
284 else {
285 j= EX - EY;
286 if (j > p2) {__cpy(x,z,p); return; }
287 else {
288 i=p2; j=p2+1-j; k=p2;
289 if (Y[j] > ZERO) {
290 Z[k+1] = RADIX - Y[j--];
291 Z[k] = MONE; }
292 else {
293 Z[k+1] = ZERO;
294 Z[k] = ZERO; j--;}
295 }
296 }
297
298 for (; j>0; i--,j--) {
299 Z[k] += (X[i] - Y[j]);
300 if (Z[k] < ZERO) {
301 Z[k] += RADIX;
302 Z[--k] = MONE; }
303 else
304 Z[--k] = ZERO;
305 }
306
307 for (; i>0; i--) {
308 Z[k] += X[i];
309 if (Z[k] < ZERO) {
310 Z[k] += RADIX;
311 Z[--k] = MONE; }
312 else
313 Z[--k] = ZERO;
314 }
315
316 for (i=1; Z[i] == ZERO; i++) ;
317 EZ = EZ - i + 1;
318 for (k=1; i <= p2+1; )
319 Z[k++] = Z[i++];
320 for (; k <= p2; )
321 Z[k++] = ZERO;
322
323 return;
324 }
325
326 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
327 and Z or Y and Z. One guard digit is used. The error is less than one
328 ULP. */
329 void __add(const mp_no *x, const mp_no *y, mp_no *z, int p) {
330
331 int n;
332
333 if (X[0] == ZERO) {__cpy(y,z,p); return; }
334 else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
335
336 if (X[0] == Y[0]) {
337 if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
338 else {add_magnitudes(y,x,z,p); Z[0] = Y[0]; }
339 }
340 else {
341 if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
342 else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = Y[0]; }
343 else Z[0] = ZERO;
344 }
345 return;
346 }
347
348 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
349 not X and Z or Y and Z. One guard digit is used. The error is less than
350 one ULP. */
351 void __sub(const mp_no *x, const mp_no *y, mp_no *z, int p) {
352
353 int n;
354
355 if (X[0] == ZERO) {__cpy(y,z,p); Z[0] = -Z[0]; return; }
356 else if (Y[0] == ZERO) {__cpy(x,z,p); return; }
357
358 if (X[0] != Y[0]) {
359 if (__acr(x,y,p) > 0) {add_magnitudes(x,y,z,p); Z[0] = X[0]; }
360 else {add_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
361 }
362 else {
363 if ((n=__acr(x,y,p)) == 1) {sub_magnitudes(x,y,z,p); Z[0] = X[0]; }
364 else if (n == -1) {sub_magnitudes(y,x,z,p); Z[0] = -Y[0]; }
365 else Z[0] = ZERO;
366 }
367 return;
368 }
369
370 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
371 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
372 digits. In case P > 3 the error is bounded by 1.001 ULP. */
373 void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) {
374
375 long i, i1, i2, j, k, k2;
376 long p2 = p;
377 double u, zk, zk2;
378
379 /* Is z=0? */
380 if (X[0]*Y[0]==ZERO)
381 { Z[0]=ZERO; return; }
382
383 /* Multiply, add and carry */
384 k2 = (p2<3) ? p2+p2 : p2+3;
385 zk = Z[k2]=ZERO;
386 for (k=k2; k>1; ) {
387 if (k > p2) {i1=k-p2; i2=p2+1; }
388 else {i1=1; i2=k; }
389 #if 1
390 /* Rearrange this inner loop to allow the fmadd instructions to be
391 independent and execute in parallel on processors that have
392 dual symmetrical FP pipelines. */
393 if (i1 < (i2-1))
394 {
395 /* Make sure we have at least 2 iterations. */
396 if (((i2 - i1) & 1L) == 1L)
397 {
398 /* Handle the odd iterations case. */
399 zk2 = x->d[i2-1]*y->d[i1];
400 }
401 else
402 zk2 = 0.0;
403 /* Do two multiply/adds per loop iteration, using independent
404 accumulators; zk and zk2. */
405 for (i=i1,j=i2-1; i<i2-1; i+=2,j-=2)
406 {
407 zk += x->d[i]*y->d[j];
408 zk2 += x->d[i+1]*y->d[j-1];
409 }
410 zk += zk2; /* Final sum. */
411 }
412 else
413 {
414 /* Special case when iterations is 1. */
415 zk += x->d[i1]*y->d[i1];
416 }
417 #else
418 /* The original code. */
419 for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j];
420 #endif
421
422 u = (zk + CUTTER)-CUTTER;
423 if (u > zk) u -= RADIX;
424 Z[k] = zk - u;
425 zk = u*RADIXI;
426 --k;
427 }
428 Z[k] = zk;
429
430 /* Is there a carry beyond the most significant digit? */
431 if (Z[1] == ZERO) {
432 for (i=1; i<=p2; i++) Z[i]=Z[i+1];
433 EZ = EX + EY - 1; }
434 else
435 EZ = EX + EY;
436
437 Z[0] = X[0] * Y[0];
438 return;
439 }
440
441 /* Invert *X and store in *Y. Relative error bound:
442 - For P = 2: 1.001 * R ^ (1 - P)
443 - For P = 3: 1.063 * R ^ (1 - P)
444 - For P > 3: 2.001 * R ^ (1 - P)
445
446 *X = 0 is not permissible. */
447 void __inv(const mp_no *x, mp_no *y, int p) {
448 long i;
449 double t;
450 mp_no z,w;
451 static const int np1[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
452 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
453
454 __cpy(x,&z,p); z.e=0; __mp_dbl(&z,&t,p);
455 t=ONE/t; __dbl_mp(t,y,p); EY -= EX;
456
457 for (i=0; i<np1[p]; i++) {
458 __cpy(y,&w,p);
459 __mul(x,&w,y,p);
460 __sub(&mptwo,y,&z,p);
461 __mul(&w,&z,y,p);
462 }
463 return;
464 }
465
466 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
467 or Y and Z. Relative error bound:
468 - For P = 2: 2.001 * R ^ (1 - P)
469 - For P = 3: 2.063 * R ^ (1 - P)
470 - For P > 3: 3.001 * R ^ (1 - P)
471
472 *X = 0 is not permissible. */
473 void __dvd(const mp_no *x, const mp_no *y, mp_no *z, int p) {
474
475 mp_no w;
476
477 if (X[0] == ZERO) Z[0] = ZERO;
478 else {__inv(y,&w,p); __mul(x,&w,z,p);}
479 return;
480 }