]>
git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc64/power4/fpu/mpa.c
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 /************************************************************************/
21 /* MODULE_NAME: mpa.c */
39 /* Arithmetic functions for multiple precision numbers. */
40 /* Relative errors are bounded */
41 /************************************************************************/
47 #include <sys/param.h>
49 const mp_no mpone
= {1, {1.0, 1.0}};
50 const mp_no mptwo
= {1, {1.0, 2.0}};
52 /* Compare mantissa of two multiple precision numbers regardless of the sign
53 and exponent of the numbers. */
54 static int mcr(const mp_no
*x
, const mp_no
*y
, int p
) {
57 for (i
=1; i
<=p2
; i
++) {
58 if (X
[i
] == Y
[i
]) continue;
59 else if (X
[i
] > Y
[i
]) return 1;
64 /* Compare the absolute values of two multiple precision numbers. */
65 int __acr(const mp_no
*x
, const mp_no
*y
, int p
) {
69 if (Y
[0] == ZERO
) i
= 0;
72 else if (Y
[0] == ZERO
) i
= 1;
75 else if (EX
< EY
) i
=-1;
82 /* Copy multiple precision number X into Y. They could be the same
84 void __cpy(const mp_no
*x
, mp_no
*y
, int p
) {
88 for (i
=0; i
<= p
; i
++) Y
[i
] = X
[i
];
93 /* Convert a multiple precision number *X into a double precision
94 number *Y, normalized case (|x| >= 2**(-1022))). */
95 static void norm(const mp_no
*x
, double *y
, int p
)
102 else if (p
==2) c
= X
[1] + R
* X
[2];
103 else if (p
==3) c
= X
[1] + R
*(X
[2] + R
* X
[3]);
104 else if (p
==4) c
=(X
[1] + R
* X
[2]) + R
*R
*(X
[3] + R
*X
[4]);
107 for (a
=ONE
, z
[1]=X
[1]; z
[1] < TWO23
; )
108 {a
*= TWO
; z
[1] *= TWO
; }
110 for (i
=2; i
<5; i
++) {
112 u
= (z
[i
] + CUTTER
)-CUTTER
;
113 if (u
> z
[i
]) u
-= RADIX
;
118 u
= (z
[3] + TWO71
) - TWO71
;
119 if (u
> z
[3]) u
-= TWO19
;
124 for (i
=5; i
<= p
; i
++) {
125 if (X
[i
] == ZERO
) continue;
126 else {z
[3] += ONE
; break; }
132 c
= (z
[1] + R
*(z
[2] + R
* z
[3]))/a
;
137 for (i
=1; i
<EX
; i
++) c
*= RADIX
;
138 for (i
=1; i
>EX
; i
--) c
*= RADIXI
;
145 /* Convert a multiple precision number *X into a double precision
146 number *Y, Denormal case (|x| < 2**(-1022))). */
147 static void denorm(const mp_no
*x
, double *y
, int p
)
154 if (EX
<-44 || (EX
==-44 && X
[1]<TWO5
))
158 if (EX
==-42) {z
[1]=X
[1]+TWO10
; z
[2]=ZERO
; z
[3]=ZERO
; k
=3;}
159 else if (EX
==-43) {z
[1]= TWO10
; z
[2]=X
[1]; z
[3]=ZERO
; k
=2;}
160 else {z
[1]= TWO10
; z
[2]=ZERO
; z
[3]=X
[1]; k
=1;}
163 if (EX
==-42) {z
[1]=X
[1]+TWO10
; z
[2]=X
[2]; z
[3]=ZERO
; k
=3;}
164 else if (EX
==-43) {z
[1]= TWO10
; z
[2]=X
[1]; z
[3]=X
[2]; k
=2;}
165 else {z
[1]= TWO10
; z
[2]=ZERO
; z
[3]=X
[1]; k
=1;}
168 if (EX
==-42) {z
[1]=X
[1]+TWO10
; z
[2]=X
[2]; k
=3;}
169 else if (EX
==-43) {z
[1]= TWO10
; z
[2]=X
[1]; k
=2;}
170 else {z
[1]= TWO10
; z
[2]=ZERO
; k
=1;}
174 u
= (z
[3] + TWO57
) - TWO57
;
175 if (u
> z
[3]) u
-= TWO5
;
178 for (i
=k
+1; i
<= p2
; i
++) {
179 if (X
[i
] == ZERO
) continue;
180 else {z
[3] += ONE
; break; }
184 c
= X
[0]*((z
[1] + R
*(z
[2] + R
*z
[3])) - TWO10
);
192 /* Convert multiple precision number *X into double precision number *Y. The
193 result is correctly rounded to the nearest/even. */
194 void __mp_dbl(const mp_no
*x
, double *y
, int p
) {
196 if (X
[0] == ZERO
) {*y
= ZERO
; return; }
198 if (EX
> -42) norm(x
,y
,p
);
199 else if (EX
==-42 && X
[1]>=TWO10
) norm(x
,y
,p
);
203 /* Get the multiple precision equivalent of X into *Y. If the precision is too
204 small, the result is truncated. */
205 void __dbl_mp(double x
, mp_no
*y
, int p
) {
212 if (x
== ZERO
) {Y
[0] = ZERO
; return; }
213 else if (x
> ZERO
) Y
[0] = ONE
;
214 else {Y
[0] = MONE
; x
=-x
; }
217 for (EY
=ONE
; x
>= RADIX
; EY
+= ONE
) x
*= RADIXI
;
218 for ( ; x
< ONE
; EY
-= ONE
) x
*= RADIX
;
222 for (i
=1; i
<=n
; i
++) {
223 u
= (x
+ TWO52
) - TWO52
;
225 Y
[i
] = u
; x
-= u
; x
*= RADIX
; }
226 for ( ; i
<=p2
; i
++) Y
[i
] = ZERO
;
230 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
231 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
232 Y and Z. No guard digit is used. The result equals the exact sum,
234 static void add_magnitudes(const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
) {
241 i
=p2
; j
=p2
+ EY
- EX
; k
=p2
+1;
244 {__cpy(x
,z
,p
); return; }
247 for (; j
>0; i
--,j
--) {
266 for (i
=1; i
<=p2
; i
++) Z
[i
] = Z
[i
+1]; }
270 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
271 The sign of the difference *Z is not changed. X and Y may overlap but not X
272 and Z or Y and Z. One guard digit is used. The error is less than one
274 static void sub_magnitudes(const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
) {
283 Z
[k
] = Z
[k
+1] = ZERO
; }
286 if (j
> p2
) {__cpy(x
,z
,p
); return; }
288 i
=p2
; j
=p2
+1-j
; k
=p2
;
290 Z
[k
+1] = RADIX
- Y
[j
--];
298 for (; j
>0; i
--,j
--) {
299 Z
[k
] += (X
[i
] - Y
[j
]);
316 for (i
=1; Z
[i
] == ZERO
; i
++) ;
318 for (k
=1; i
<= p2
+1; )
326 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
327 and Z or Y and Z. One guard digit is used. The error is less than one
329 void __add(const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
) {
333 if (X
[0] == ZERO
) {__cpy(y
,z
,p
); return; }
334 else if (Y
[0] == ZERO
) {__cpy(x
,z
,p
); return; }
337 if (__acr(x
,y
,p
) > 0) {add_magnitudes(x
,y
,z
,p
); Z
[0] = X
[0]; }
338 else {add_magnitudes(y
,x
,z
,p
); Z
[0] = Y
[0]; }
341 if ((n
=__acr(x
,y
,p
)) == 1) {sub_magnitudes(x
,y
,z
,p
); Z
[0] = X
[0]; }
342 else if (n
== -1) {sub_magnitudes(y
,x
,z
,p
); Z
[0] = Y
[0]; }
348 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
349 not X and Z or Y and Z. One guard digit is used. The error is less than
351 void __sub(const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
) {
355 if (X
[0] == ZERO
) {__cpy(y
,z
,p
); Z
[0] = -Z
[0]; return; }
356 else if (Y
[0] == ZERO
) {__cpy(x
,z
,p
); return; }
359 if (__acr(x
,y
,p
) > 0) {add_magnitudes(x
,y
,z
,p
); Z
[0] = X
[0]; }
360 else {add_magnitudes(y
,x
,z
,p
); Z
[0] = -Y
[0]; }
363 if ((n
=__acr(x
,y
,p
)) == 1) {sub_magnitudes(x
,y
,z
,p
); Z
[0] = X
[0]; }
364 else if (n
== -1) {sub_magnitudes(y
,x
,z
,p
); Z
[0] = -Y
[0]; }
370 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
371 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
372 digits. In case P > 3 the error is bounded by 1.001 ULP. */
373 void __mul(const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
) {
375 long i
, i1
, i2
, j
, k
, k2
;
381 { Z
[0]=ZERO
; return; }
383 /* Multiply, add and carry */
384 k2
= (p2
<3) ? p2
+p2
: p2
+3;
387 if (k
> p2
) {i1
=k
-p2
; i2
=p2
+1; }
390 /* Rearrange this inner loop to allow the fmadd instructions to be
391 independent and execute in parallel on processors that have
392 dual symmetrical FP pipelines. */
395 /* Make sure we have at least 2 iterations. */
396 if (((i2
- i1
) & 1L) == 1L)
398 /* Handle the odd iterations case. */
399 zk2
= x
->d
[i2
-1]*y
->d
[i1
];
403 /* Do two multiply/adds per loop iteration, using independent
404 accumulators; zk and zk2. */
405 for (i
=i1
,j
=i2
-1; i
<i2
-1; i
+=2,j
-=2)
407 zk
+= x
->d
[i
]*y
->d
[j
];
408 zk2
+= x
->d
[i
+1]*y
->d
[j
-1];
410 zk
+= zk2
; /* Final sum. */
414 /* Special case when iterations is 1. */
415 zk
+= x
->d
[i1
]*y
->d
[i1
];
418 /* The original code. */
419 for (i
=i1
,j
=i2
-1; i
<i2
; i
++,j
--) zk
+= X
[i
]*Y
[j
];
422 u
= (zk
+ CUTTER
)-CUTTER
;
423 if (u
> zk
) u
-= RADIX
;
430 /* Is there a carry beyond the most significant digit? */
432 for (i
=1; i
<=p2
; i
++) Z
[i
]=Z
[i
+1];
441 /* Invert *X and store in *Y. Relative error bound:
442 - For P = 2: 1.001 * R ^ (1 - P)
443 - For P = 3: 1.063 * R ^ (1 - P)
444 - For P > 3: 2.001 * R ^ (1 - P)
446 *X = 0 is not permissible. */
447 void __inv(const mp_no
*x
, mp_no
*y
, int p
) {
451 static const int np1
[] = {0,0,0,0,1,2,2,2,2,3,3,3,3,3,3,3,3,3,
452 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4};
454 __cpy(x
,&z
,p
); z
.e
=0; __mp_dbl(&z
,&t
,p
);
455 t
=ONE
/t
; __dbl_mp(t
,y
,p
); EY
-= EX
;
457 for (i
=0; i
<np1
[p
]; i
++) {
460 __sub(&mptwo
,y
,&z
,p
);
466 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
467 or Y and Z. Relative error bound:
468 - For P = 2: 2.001 * R ^ (1 - P)
469 - For P = 3: 2.063 * R ^ (1 - P)
470 - For P > 3: 3.001 * R ^ (1 - P)
472 *X = 0 is not permissible. */
473 void __dvd(const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
) {
477 if (X
[0] == ZERO
) Z
[0] = ZERO
;
478 else {__inv(y
,&w
,p
); __mul(x
,&w
,z
,p
);}