]>
git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
20 /************************************************************************/
21 /* MODULE_NAME: mpa.c */
39 /* Arithmetic functions for multiple precision numbers. */
40 /* Relative errors are bounded */
41 /************************************************************************/
46 #include <sys/param.h>
48 const mp_no mpone
= {1, {1.0, 1.0}};
49 const mp_no mptwo
= {1, {1.0, 2.0}};
51 /* Compare mantissa of two multiple precision numbers regardless of the sign
52 and exponent of the numbers. */
54 mcr (const mp_no
*x
, const mp_no
*y
, int p
)
58 for (i
= 1; i
<= p2
; i
++)
70 /* Compare the absolute values of two multiple precision numbers. */
72 __acr (const mp_no
*x
, const mp_no
*y
, int p
)
83 else if (Y
[0] == ZERO
)
98 /* Copy multiple precision number X into Y. They could be the same
101 __cpy (const mp_no
*x
, mp_no
*y
, int p
)
106 for (i
= 0; i
<= p
; i
++)
110 /* Convert a multiple precision number *X into a double precision
111 number *Y, normalized case (|x| >= 2**(-1022))). */
113 norm (const mp_no
*x
, double *y
, int p
)
117 double a
, c
, u
, v
, z
[5];
125 c
= X
[1] + R
* (X
[2] + R
* X
[3]);
127 c
= (X
[1] + R
* X
[2]) + R
* R
* (X
[3] + R
* X
[4]);
131 for (a
= ONE
, z
[1] = X
[1]; z
[1] < TWO23
;)
137 for (i
= 2; i
< 5; i
++)
140 u
= (z
[i
] + CUTTER
) - CUTTER
;
144 z
[i
- 1] += u
* RADIXI
;
147 u
= (z
[3] + TWO71
) - TWO71
;
156 for (i
= 5; i
<= p
; i
++)
171 c
= (z
[1] + R
* (z
[2] + R
* z
[3])) / a
;
176 for (i
= 1; i
< EX
; i
++)
178 for (i
= 1; i
> EX
; i
--)
185 /* Convert a multiple precision number *X into a double precision
186 number *Y, Denormal case (|x| < 2**(-1022))). */
188 denorm (const mp_no
*x
, double *y
, int p
)
195 if (EX
< -44 || (EX
== -44 && X
[1] < TWO5
))
272 u
= (z
[3] + TWO57
) - TWO57
;
278 for (i
= k
+ 1; i
<= p2
; i
++)
290 c
= X
[0] * ((z
[1] + R
* (z
[2] + R
* z
[3])) - TWO10
);
296 /* Convert multiple precision number *X into double precision number *Y. The
297 result is correctly rounded to the nearest/even. */
299 __mp_dbl (const mp_no
*x
, double *y
, int p
)
307 if (__glibc_likely (EX
> -42 || (EX
== -42 && X
[1] >= TWO10
)))
313 /* Get the multiple precision equivalent of X into *Y. If the precision is too
314 small, the result is truncated. */
316 __dbl_mp (double x
, mp_no
*y
, int p
)
337 for (EY
= ONE
; x
>= RADIX
; EY
+= ONE
)
339 for (; x
< ONE
; EY
-= ONE
)
344 for (i
= 1; i
<= n
; i
++)
346 u
= (x
+ TWO52
) - TWO52
;
357 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
358 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
359 Y and Z. No guard digit is used. The result equals the exact sum,
362 add_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
381 for (; j
> 0; i
--, j
--)
407 for (i
= 1; i
<= p2
; i
++)
414 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
415 The sign of the difference *Z is not changed. X and Y may overlap but not X
416 and Z or Y and Z. One guard digit is used. The error is less than one
419 sub_magnitudes (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
429 Z
[k
] = Z
[k
+ 1] = ZERO
;
446 Z
[k
+ 1] = RADIX
- Y
[j
--];
458 for (; j
> 0; i
--, j
--)
460 Z
[k
] += (X
[i
] - Y
[j
]);
482 for (i
= 1; Z
[i
] == ZERO
; i
++);
484 for (k
= 1; i
<= p2
+ 1;)
490 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
491 and Z or Y and Z. One guard digit is used. The error is less than one
494 __add (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
503 else if (Y
[0] == ZERO
)
511 if (__acr (x
, y
, p
) > 0)
513 add_magnitudes (x
, y
, z
, p
);
518 add_magnitudes (y
, x
, z
, p
);
524 if ((n
= __acr (x
, y
, p
)) == 1)
526 sub_magnitudes (x
, y
, z
, p
);
531 sub_magnitudes (y
, x
, z
, p
);
539 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
540 not X and Z or Y and Z. One guard digit is used. The error is less than
543 __sub (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
553 else if (Y
[0] == ZERO
)
561 if (__acr (x
, y
, p
) > 0)
563 add_magnitudes (x
, y
, z
, p
);
568 add_magnitudes (y
, x
, z
, p
);
574 if ((n
= __acr (x
, y
, p
)) == 1)
576 sub_magnitudes (x
, y
, z
, p
);
581 sub_magnitudes (y
, x
, z
, p
);
589 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
590 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
591 digits. In case P > 3 the error is bounded by 1.001 ULP. */
593 __mul (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)
595 long i
, i1
, i2
, j
, k
, k2
;
600 if (__glibc_unlikely (X
[0] * Y
[0] == ZERO
))
606 /* Multiply, add and carry */
607 k2
= (p2
< 3) ? p2
+ p2
: p2
+ 3;
622 /* Rearrange this inner loop to allow the fmadd instructions to be
623 independent and execute in parallel on processors that have
624 dual symmetrical FP pipelines. */
627 /* Make sure we have at least 2 iterations. */
628 if (((i2
- i1
) & 1L) == 1L)
630 /* Handle the odd iterations case. */
631 zk2
= x
->d
[i2
- 1] * y
->d
[i1
];
635 /* Do two multiply/adds per loop iteration, using independent
636 accumulators; zk and zk2. */
637 for (i
= i1
, j
= i2
- 1; i
< i2
- 1; i
+= 2, j
-= 2)
639 zk
+= x
->d
[i
] * y
->d
[j
];
640 zk2
+= x
->d
[i
+ 1] * y
->d
[j
- 1];
642 zk
+= zk2
; /* Final sum. */
646 /* Special case when iterations is 1. */
647 zk
+= x
->d
[i1
] * y
->d
[i1
];
650 /* The original code. */
651 for (i
= i1
, j
= i2
- 1; i
< i2
; i
++, j
--)
655 u
= (zk
+ CUTTER
) - CUTTER
;
664 /* Is there a carry beyond the most significant digit? */
667 for (i
= 1; i
<= p2
; i
++)
677 /* Square *X and store result in *Y. X and Y may not overlap. For P in
678 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
679 error is bounded by 1.001 ULP. This is a faster special case of
682 __sqr (const mp_no
*x
, mp_no
*y
, int p
)
688 if (__glibc_unlikely (X
[0] == ZERO
))
694 /* We need not iterate through all X's since it's pointless to
696 for (ip
= p
; ip
> 0; ip
--)
700 k
= (__glibc_unlikely (p
< 3)) ? p
+ p
: p
+ 3;
702 while (k
> 2 * ip
+ 1)
714 yk
+= X
[lim
] * X
[lim
];
718 /* In __mul, this loop (and the one within the next while loop) run
719 between a range to calculate the mantissa as follows:
721 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
724 For X == Y, we can get away with summing halfway and doubling the
725 result. For cases where the range size is even, the mid-point needs
726 to be added separately (above). */
727 for (i
= k
- p
, j
= p
; i
<= lim
; i
++, j
--)
732 u
= (yk
+ CUTTER
) - CUTTER
;
746 yk
+= X
[lim
] * X
[lim
];
750 /* Likewise for this loop. */
751 for (i
= 1, j
= k
- 1; i
<= lim
; i
++, j
--)
756 u
= (yk
+ CUTTER
) - CUTTER
;
764 /* Squares are always positive. */
768 /* Is there a carry beyond the most significant digit? */
769 if (__glibc_unlikely (Y
[1] == ZERO
))
771 for (i
= 1; i
<= p
; i
++)
777 /* Invert *X and store in *Y. Relative error bound:
778 - For P = 2: 1.001 * R ^ (1 - P)
779 - For P = 3: 1.063 * R ^ (1 - P)
780 - For P > 3: 2.001 * R ^ (1 - P)
782 *X = 0 is not permissible. */
784 __inv (const mp_no
*x
, mp_no
*y
, int p
)
789 static const int np1
[] =
790 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
791 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
796 __mp_dbl (&z
, &t
, p
);
801 for (i
= 0; i
< np1
[p
]; i
++)
805 __sub (&mptwo
, y
, &z
, p
);
806 __mul (&w
, &z
, y
, p
);
810 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
811 or Y and Z. Relative error bound:
812 - For P = 2: 2.001 * R ^ (1 - P)
813 - For P = 3: 2.063 * R ^ (1 - P)
814 - For P > 3: 3.001 * R ^ (1 - P)
816 *X = 0 is not permissible. */
818 __dvd (const mp_no
*x
, const mp_no
*y
, mp_no
*z
, int p
)