]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/powerpc/powerpc32/power4/fpu/mpa.c
Sync up powerpc __mp_dbl with default code
[thirdparty/glibc.git] / sysdeps / powerpc / powerpc32 / power4 / fpu / mpa.c
1
2 /*
3 * IBM Accurate Mathematical Library
4 * written by International Business Machines Corp.
5 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20 /************************************************************************/
21 /* MODULE_NAME: mpa.c */
22 /* */
23 /* FUNCTIONS: */
24 /* mcr */
25 /* acr */
26 /* cpy */
27 /* norm */
28 /* denorm */
29 /* mp_dbl */
30 /* dbl_mp */
31 /* add_magnitudes */
32 /* sub_magnitudes */
33 /* add */
34 /* sub */
35 /* mul */
36 /* inv */
37 /* dvd */
38 /* */
39 /* Arithmetic functions for multiple precision numbers. */
40 /* Relative errors are bounded */
41 /************************************************************************/
42
43
44 #include "endian.h"
45 #include "mpa.h"
46 #include <sys/param.h>
47
48 const mp_no mpone = {1, {1.0, 1.0}};
49 const mp_no mptwo = {1, {1.0, 2.0}};
50
51 /* Compare mantissa of two multiple precision numbers regardless of the sign
52 and exponent of the numbers. */
53 static int
54 mcr (const mp_no *x, const mp_no *y, int p)
55 {
56 long i;
57 long p2 = p;
58 for (i = 1; i <= p2; i++)
59 {
60 if (X[i] == Y[i])
61 continue;
62 else if (X[i] > Y[i])
63 return 1;
64 else
65 return -1;
66 }
67 return 0;
68 }
69
70 /* Compare the absolute values of two multiple precision numbers. */
71 int
72 __acr (const mp_no *x, const mp_no *y, int p)
73 {
74 long i;
75
76 if (X[0] == ZERO)
77 {
78 if (Y[0] == ZERO)
79 i = 0;
80 else
81 i = -1;
82 }
83 else if (Y[0] == ZERO)
84 i = 1;
85 else
86 {
87 if (EX > EY)
88 i = 1;
89 else if (EX < EY)
90 i = -1;
91 else
92 i = mcr (x, y, p);
93 }
94
95 return i;
96 }
97
98 /* Copy multiple precision number X into Y. They could be the same
99 number. */
100 void
101 __cpy (const mp_no *x, mp_no *y, int p)
102 {
103 long i;
104
105 EY = EX;
106 for (i = 0; i <= p; i++)
107 Y[i] = X[i];
108 }
109
110 /* Convert a multiple precision number *X into a double precision
111 number *Y, normalized case (|x| >= 2**(-1022))). */
112 static void
113 norm (const mp_no *x, double *y, int p)
114 {
115 #define R RADIXI
116 long i;
117 double a, c, u, v, z[5];
118 if (p < 5)
119 {
120 if (p == 1)
121 c = X[1];
122 else if (p == 2)
123 c = X[1] + R * X[2];
124 else if (p == 3)
125 c = X[1] + R * (X[2] + R * X[3]);
126 else if (p == 4)
127 c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
128 }
129 else
130 {
131 for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
132 {
133 a *= TWO;
134 z[1] *= TWO;
135 }
136
137 for (i = 2; i < 5; i++)
138 {
139 z[i] = X[i] * a;
140 u = (z[i] + CUTTER) - CUTTER;
141 if (u > z[i])
142 u -= RADIX;
143 z[i] -= u;
144 z[i - 1] += u * RADIXI;
145 }
146
147 u = (z[3] + TWO71) - TWO71;
148 if (u > z[3])
149 u -= TWO19;
150 v = z[3] - u;
151
152 if (v == TWO18)
153 {
154 if (z[4] == ZERO)
155 {
156 for (i = 5; i <= p; i++)
157 {
158 if (X[i] == ZERO)
159 continue;
160 else
161 {
162 z[3] += ONE;
163 break;
164 }
165 }
166 }
167 else
168 z[3] += ONE;
169 }
170
171 c = (z[1] + R * (z[2] + R * z[3])) / a;
172 }
173
174 c *= X[0];
175
176 for (i = 1; i < EX; i++)
177 c *= RADIX;
178 for (i = 1; i > EX; i--)
179 c *= RADIXI;
180
181 *y = c;
182 #undef R
183 }
184
185 /* Convert a multiple precision number *X into a double precision
186 number *Y, Denormal case (|x| < 2**(-1022))). */
187 static void
188 denorm (const mp_no *x, double *y, int p)
189 {
190 long i, k;
191 long p2 = p;
192 double c, u, z[5];
193
194 #define R RADIXI
195 if (EX < -44 || (EX == -44 && X[1] < TWO5))
196 {
197 *y = ZERO;
198 return;
199 }
200
201 if (p2 == 1)
202 {
203 if (EX == -42)
204 {
205 z[1] = X[1] + TWO10;
206 z[2] = ZERO;
207 z[3] = ZERO;
208 k = 3;
209 }
210 else if (EX == -43)
211 {
212 z[1] = TWO10;
213 z[2] = X[1];
214 z[3] = ZERO;
215 k = 2;
216 }
217 else
218 {
219 z[1] = TWO10;
220 z[2] = ZERO;
221 z[3] = X[1];
222 k = 1;
223 }
224 }
225 else if (p2 == 2)
226 {
227 if (EX == -42)
228 {
229 z[1] = X[1] + TWO10;
230 z[2] = X[2];
231 z[3] = ZERO;
232 k = 3;
233 }
234 else if (EX == -43)
235 {
236 z[1] = TWO10;
237 z[2] = X[1];
238 z[3] = X[2];
239 k = 2;
240 }
241 else
242 {
243 z[1] = TWO10;
244 z[2] = ZERO;
245 z[3] = X[1];
246 k = 1;
247 }
248 }
249 else
250 {
251 if (EX == -42)
252 {
253 z[1] = X[1] + TWO10;
254 z[2] = X[2];
255 k = 3;
256 }
257 else if (EX == -43)
258 {
259 z[1] = TWO10;
260 z[2] = X[1];
261 k = 2;
262 }
263 else
264 {
265 z[1] = TWO10;
266 z[2] = ZERO;
267 k = 1;
268 }
269 z[3] = X[k];
270 }
271
272 u = (z[3] + TWO57) - TWO57;
273 if (u > z[3])
274 u -= TWO5;
275
276 if (u == z[3])
277 {
278 for (i = k + 1; i <= p2; i++)
279 {
280 if (X[i] == ZERO)
281 continue;
282 else
283 {
284 z[3] += ONE;
285 break;
286 }
287 }
288 }
289
290 c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
291
292 *y = c * TWOM1032;
293 #undef R
294 }
295
296 /* Convert multiple precision number *X into double precision number *Y. The
297 result is correctly rounded to the nearest/even. */
298 void
299 __mp_dbl (const mp_no *x, double *y, int p)
300 {
301 if (X[0] == ZERO)
302 {
303 *y = ZERO;
304 return;
305 }
306
307 if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
308 norm (x, y, p);
309 else
310 denorm (x, y, p);
311 }
312
313 /* Get the multiple precision equivalent of X into *Y. If the precision is too
314 small, the result is truncated. */
315 void
316 __dbl_mp (double x, mp_no *y, int p)
317 {
318 long i, n;
319 long p2 = p;
320 double u;
321
322 /* Sign. */
323 if (x == ZERO)
324 {
325 Y[0] = ZERO;
326 return;
327 }
328 else if (x > ZERO)
329 Y[0] = ONE;
330 else
331 {
332 Y[0] = MONE;
333 x = -x;
334 }
335
336 /* Exponent. */
337 for (EY = ONE; x >= RADIX; EY += ONE)
338 x *= RADIXI;
339 for (; x < ONE; EY -= ONE)
340 x *= RADIX;
341
342 /* Digits. */
343 n = MIN (p2, 4);
344 for (i = 1; i <= n; i++)
345 {
346 u = (x + TWO52) - TWO52;
347 if (u > x)
348 u -= ONE;
349 Y[i] = u;
350 x -= u;
351 x *= RADIX;
352 }
353 for (; i <= p2; i++)
354 Y[i] = ZERO;
355 }
356
357 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0. The
358 sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
359 Y and Z. No guard digit is used. The result equals the exact sum,
360 truncated. */
361 static void
362 add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
363 {
364 long i, j, k;
365 long p2 = p;
366
367 EZ = EX;
368
369 i = p2;
370 j = p2 + EY - EX;
371 k = p2 + 1;
372
373 if (j < 1)
374 {
375 __cpy (x, z, p);
376 return;
377 }
378 else
379 Z[k] = ZERO;
380
381 for (; j > 0; i--, j--)
382 {
383 Z[k] += X[i] + Y[j];
384 if (Z[k] >= RADIX)
385 {
386 Z[k] -= RADIX;
387 Z[--k] = ONE;
388 }
389 else
390 Z[--k] = ZERO;
391 }
392
393 for (; i > 0; i--)
394 {
395 Z[k] += X[i];
396 if (Z[k] >= RADIX)
397 {
398 Z[k] -= RADIX;
399 Z[--k] = ONE;
400 }
401 else
402 Z[--k] = ZERO;
403 }
404
405 if (Z[1] == ZERO)
406 {
407 for (i = 1; i <= p2; i++)
408 Z[i] = Z[i + 1];
409 }
410 else
411 EZ += ONE;
412 }
413
414 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
415 The sign of the difference *Z is not changed. X and Y may overlap but not X
416 and Z or Y and Z. One guard digit is used. The error is less than one
417 ULP. */
418 static void
419 sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
420 {
421 long i, j, k;
422 long p2 = p;
423
424 EZ = EX;
425
426 if (EX == EY)
427 {
428 i = j = k = p2;
429 Z[k] = Z[k + 1] = ZERO;
430 }
431 else
432 {
433 j = EX - EY;
434 if (j > p2)
435 {
436 __cpy (x, z, p);
437 return;
438 }
439 else
440 {
441 i = p2;
442 j = p2 + 1 - j;
443 k = p2;
444 if (Y[j] > ZERO)
445 {
446 Z[k + 1] = RADIX - Y[j--];
447 Z[k] = MONE;
448 }
449 else
450 {
451 Z[k + 1] = ZERO;
452 Z[k] = ZERO;
453 j--;
454 }
455 }
456 }
457
458 for (; j > 0; i--, j--)
459 {
460 Z[k] += (X[i] - Y[j]);
461 if (Z[k] < ZERO)
462 {
463 Z[k] += RADIX;
464 Z[--k] = MONE;
465 }
466 else
467 Z[--k] = ZERO;
468 }
469
470 for (; i > 0; i--)
471 {
472 Z[k] += X[i];
473 if (Z[k] < ZERO)
474 {
475 Z[k] += RADIX;
476 Z[--k] = MONE;
477 }
478 else
479 Z[--k] = ZERO;
480 }
481
482 for (i = 1; Z[i] == ZERO; i++);
483 EZ = EZ - i + 1;
484 for (k = 1; i <= p2 + 1;)
485 Z[k++] = Z[i++];
486 for (; k <= p2;)
487 Z[k++] = ZERO;
488 }
489
490 /* Add *X and *Y and store the result in *Z. X and Y may overlap, but not X
491 and Z or Y and Z. One guard digit is used. The error is less than one
492 ULP. */
493 void
494 __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
495 {
496 int n;
497
498 if (X[0] == ZERO)
499 {
500 __cpy (y, z, p);
501 return;
502 }
503 else if (Y[0] == ZERO)
504 {
505 __cpy (x, z, p);
506 return;
507 }
508
509 if (X[0] == Y[0])
510 {
511 if (__acr (x, y, p) > 0)
512 {
513 add_magnitudes (x, y, z, p);
514 Z[0] = X[0];
515 }
516 else
517 {
518 add_magnitudes (y, x, z, p);
519 Z[0] = Y[0];
520 }
521 }
522 else
523 {
524 if ((n = __acr (x, y, p)) == 1)
525 {
526 sub_magnitudes (x, y, z, p);
527 Z[0] = X[0];
528 }
529 else if (n == -1)
530 {
531 sub_magnitudes (y, x, z, p);
532 Z[0] = Y[0];
533 }
534 else
535 Z[0] = ZERO;
536 }
537 }
538
539 /* Subtract *Y from *X and return the result in *Z. X and Y may overlap but
540 not X and Z or Y and Z. One guard digit is used. The error is less than
541 one ULP. */
542 void
543 __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
544 {
545 int n;
546
547 if (X[0] == ZERO)
548 {
549 __cpy (y, z, p);
550 Z[0] = -Z[0];
551 return;
552 }
553 else if (Y[0] == ZERO)
554 {
555 __cpy (x, z, p);
556 return;
557 }
558
559 if (X[0] != Y[0])
560 {
561 if (__acr (x, y, p) > 0)
562 {
563 add_magnitudes (x, y, z, p);
564 Z[0] = X[0];
565 }
566 else
567 {
568 add_magnitudes (y, x, z, p);
569 Z[0] = -Y[0];
570 }
571 }
572 else
573 {
574 if ((n = __acr (x, y, p)) == 1)
575 {
576 sub_magnitudes (x, y, z, p);
577 Z[0] = X[0];
578 }
579 else if (n == -1)
580 {
581 sub_magnitudes (y, x, z, p);
582 Z[0] = -Y[0];
583 }
584 else
585 Z[0] = ZERO;
586 }
587 }
588
589 /* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X
590 and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
591 digits. In case P > 3 the error is bounded by 1.001 ULP. */
592 void
593 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
594 {
595 long i, i1, i2, j, k, k2;
596 long p2 = p;
597 double u, zk, zk2;
598
599 /* Is z=0? */
600 if (__glibc_unlikely (X[0] * Y[0] == ZERO))
601 {
602 Z[0] = ZERO;
603 return;
604 }
605
606 /* Multiply, add and carry */
607 k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
608 zk = Z[k2] = ZERO;
609 for (k = k2; k > 1;)
610 {
611 if (k > p2)
612 {
613 i1 = k - p2;
614 i2 = p2 + 1;
615 }
616 else
617 {
618 i1 = 1;
619 i2 = k;
620 }
621 #if 1
622 /* Rearrange this inner loop to allow the fmadd instructions to be
623 independent and execute in parallel on processors that have
624 dual symmetrical FP pipelines. */
625 if (i1 < (i2 - 1))
626 {
627 /* Make sure we have at least 2 iterations. */
628 if (((i2 - i1) & 1L) == 1L)
629 {
630 /* Handle the odd iterations case. */
631 zk2 = x->d[i2 - 1] * y->d[i1];
632 }
633 else
634 zk2 = 0.0;
635 /* Do two multiply/adds per loop iteration, using independent
636 accumulators; zk and zk2. */
637 for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
638 {
639 zk += x->d[i] * y->d[j];
640 zk2 += x->d[i + 1] * y->d[j - 1];
641 }
642 zk += zk2; /* Final sum. */
643 }
644 else
645 {
646 /* Special case when iterations is 1. */
647 zk += x->d[i1] * y->d[i1];
648 }
649 #else
650 /* The original code. */
651 for (i = i1, j = i2 - 1; i < i2; i++, j--)
652 zk += X[i] * Y[j];
653 #endif
654
655 u = (zk + CUTTER) - CUTTER;
656 if (u > zk)
657 u -= RADIX;
658 Z[k] = zk - u;
659 zk = u * RADIXI;
660 --k;
661 }
662 Z[k] = zk;
663
664 /* Is there a carry beyond the most significant digit? */
665 if (Z[1] == ZERO)
666 {
667 for (i = 1; i <= p2; i++)
668 Z[i] = Z[i + 1];
669 EZ = EX + EY - 1;
670 }
671 else
672 EZ = EX + EY;
673
674 Z[0] = X[0] * Y[0];
675 }
676
677 /* Square *X and store result in *Y. X and Y may not overlap. For P in
678 [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
679 error is bounded by 1.001 ULP. This is a faster special case of
680 multiplication. */
681 void
682 __sqr (const mp_no *x, mp_no *y, int p)
683 {
684 long i, j, k, ip;
685 double u, yk;
686
687 /* Is z=0? */
688 if (__glibc_unlikely (X[0] == ZERO))
689 {
690 Y[0] = ZERO;
691 return;
692 }
693
694 /* We need not iterate through all X's since it's pointless to
695 multiply zeroes. */
696 for (ip = p; ip > 0; ip--)
697 if (X[ip] != ZERO)
698 break;
699
700 k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
701
702 while (k > 2 * ip + 1)
703 Y[k--] = ZERO;
704
705 yk = ZERO;
706
707 while (k > p)
708 {
709 double yk2 = 0.0;
710 long lim = k / 2;
711
712 if (k % 2 == 0)
713 {
714 yk += X[lim] * X[lim];
715 lim--;
716 }
717
718 /* In __mul, this loop (and the one within the next while loop) run
719 between a range to calculate the mantissa as follows:
720
721 Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
722 + X[n] * Y[k]
723
724 For X == Y, we can get away with summing halfway and doubling the
725 result. For cases where the range size is even, the mid-point needs
726 to be added separately (above). */
727 for (i = k - p, j = p; i <= lim; i++, j--)
728 yk2 += X[i] * X[j];
729
730 yk += 2.0 * yk2;
731
732 u = (yk + CUTTER) - CUTTER;
733 if (u > yk)
734 u -= RADIX;
735 Y[k--] = yk - u;
736 yk = u * RADIXI;
737 }
738
739 while (k > 1)
740 {
741 double yk2 = 0.0;
742 long lim = k / 2;
743
744 if (k % 2 == 0)
745 {
746 yk += X[lim] * X[lim];
747 lim--;
748 }
749
750 /* Likewise for this loop. */
751 for (i = 1, j = k - 1; i <= lim; i++, j--)
752 yk2 += X[i] * X[j];
753
754 yk += 2.0 * yk2;
755
756 u = (yk + CUTTER) - CUTTER;
757 if (u > yk)
758 u -= RADIX;
759 Y[k--] = yk - u;
760 yk = u * RADIXI;
761 }
762 Y[k] = yk;
763
764 /* Squares are always positive. */
765 Y[0] = 1.0;
766
767 EY = 2 * EX;
768 /* Is there a carry beyond the most significant digit? */
769 if (__glibc_unlikely (Y[1] == ZERO))
770 {
771 for (i = 1; i <= p; i++)
772 Y[i] = Y[i + 1];
773 EY--;
774 }
775 }
776
777 /* Invert *X and store in *Y. Relative error bound:
778 - For P = 2: 1.001 * R ^ (1 - P)
779 - For P = 3: 1.063 * R ^ (1 - P)
780 - For P > 3: 2.001 * R ^ (1 - P)
781
782 *X = 0 is not permissible. */
783 static void
784 __inv (const mp_no *x, mp_no *y, int p)
785 {
786 long i;
787 double t;
788 mp_no z, w;
789 static const int np1[] =
790 { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
791 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
792 };
793
794 __cpy (x, &z, p);
795 z.e = 0;
796 __mp_dbl (&z, &t, p);
797 t = ONE / t;
798 __dbl_mp (t, y, p);
799 EY -= EX;
800
801 for (i = 0; i < np1[p]; i++)
802 {
803 __cpy (y, &w, p);
804 __mul (x, &w, y, p);
805 __sub (&mptwo, y, &z, p);
806 __mul (&w, &z, y, p);
807 }
808 }
809
810 /* Divide *X by *Y and store result in *Z. X and Y may overlap but not X and Z
811 or Y and Z. Relative error bound:
812 - For P = 2: 2.001 * R ^ (1 - P)
813 - For P = 3: 2.063 * R ^ (1 - P)
814 - For P > 3: 3.001 * R ^ (1 - P)
815
816 *X = 0 is not permissible. */
817 void
818 __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
819 {
820 mp_no w;
821
822 if (X[0] == ZERO)
823 Z[0] = ZERO;
824 else
825 {
826 __inv (y, &w, p);
827 __mul (x, &w, z, p);
828 }
829 }