sysdeps/powerpc/powerpc32/power4/fpu/mpa.c

   1
   2 /*
   3  * IBM Accurate Mathematical Library
   4  * written by International Business Machines Corp.
   5  * Copyright (C) 2001-2013 Free Software Foundation, Inc.
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU Lesser General Public License as published by
   9  * the Free Software Foundation; either version 2.1 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public License
  18  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  19  */
  20 /************************************************************************/
  21 /*  MODULE_NAME: mpa.c                                                  */
  22 /*                                                                      */
  23 /*  FUNCTIONS:                                                          */
  24 /*               mcr                                                    */
  25 /*               acr                                                    */
  26 /*               cpy                                                    */
  27 /*               norm                                                   */
  28 /*               denorm                                                 */
  29 /*               mp_dbl                                                 */
  30 /*               dbl_mp                                                 */
  31 /*               add_magnitudes                                         */
  32 /*               sub_magnitudes                                         */
  33 /*               add                                                    */
  34 /*               sub                                                    */
  35 /*               mul                                                    */
  36 /*               inv                                                    */
  37 /*               dvd                                                    */
  38 /*                                                                      */
  39 /* Arithmetic functions for multiple precision numbers.                 */
  40 /* Relative errors are bounded                                          */
  41 /************************************************************************/
  42
  43
  44 #include "endian.h"
  45 #include "mpa.h"
  46 #include <sys/param.h>
  47
  48 const mp_no mpone = {1, {1.0, 1.0}};
  49 const mp_no mptwo = {1, {1.0, 2.0}};
  50
  51 /* Compare mantissa of two multiple precision numbers regardless of the sign
  52    and exponent of the numbers.  */
  53 static int
  54 mcr (const mp_no *x, const mp_no *y, int p)
  55 {
  56   long i;
  57   long p2 = p;
  58   for (i = 1; i <= p2; i++)
  59     {
  60       if (X[i] == Y[i])
  61         continue;
  62       else if (X[i] > Y[i])
  63         return 1;
  64       else
  65         return -1;
  66     }
  67   return 0;
  68 }
  69
  70 /* Compare the absolute values of two multiple precision numbers.  */
  71 int
  72 __acr (const mp_no *x, const mp_no *y, int p)
  73 {
  74   long i;
  75
  76   if (X[0] == ZERO)
  77     {
  78       if (Y[0] == ZERO)
  79         i = 0;
  80       else
  81         i = -1;
  82     }
  83   else if (Y[0] == ZERO)
  84     i = 1;
  85   else
  86     {
  87       if (EX > EY)
  88         i = 1;
  89       else if (EX < EY)
  90         i = -1;
  91       else
  92         i = mcr (x, y, p);
  93     }
  94
  95   return i;
  96 }
  97
  98 /* Copy multiple precision number X into Y.  They could be the same
  99    number.  */
 100 void
 101 __cpy (const mp_no *x, mp_no *y, int p)
 102 {
 103   long i;
 104
 105   EY = EX;
 106   for (i = 0; i <= p; i++)
 107     Y[i] = X[i];
 108 }
 109
 110 /* Convert a multiple precision number *X into a double precision
 111    number *Y, normalized case  (|x| >= 2**(-1022))).  */
 112 static void
 113 norm (const mp_no *x, double *y, int p)
 114 {
 115 #define R RADIXI
 116   long i;
 117   double a, c, u, v, z[5];
 118   if (p < 5)
 119     {
 120       if (p == 1)
 121         c = X[1];
 122       else if (p == 2)
 123         c = X[1] + R * X[2];
 124       else if (p == 3)
 125         c = X[1] + R * (X[2] + R * X[3]);
 126       else if (p == 4)
 127         c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
 128     }
 129   else
 130     {
 131       for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
 132         {
 133           a *= TWO;
 134           z[1] *= TWO;
 135         }
 136
 137       for (i = 2; i < 5; i++)
 138         {
 139           z[i] = X[i] * a;
 140           u = (z[i] + CUTTER) - CUTTER;
 141           if (u > z[i])
 142             u -= RADIX;
 143           z[i] -= u;
 144           z[i - 1] += u * RADIXI;
 145         }
 146
 147       u = (z[3] + TWO71) - TWO71;
 148       if (u > z[3])
 149         u -= TWO19;
 150       v = z[3] - u;
 151
 152       if (v == TWO18)
 153         {
 154           if (z[4] == ZERO)
 155             {
 156               for (i = 5; i <= p; i++)
 157                 {
 158                   if (X[i] == ZERO)
 159                     continue;
 160                   else
 161                     {
 162                       z[3] += ONE;
 163                       break;
 164                     }
 165                 }
 166             }
 167           else
 168             z[3] += ONE;
 169         }
 170
 171       c = (z[1] + R * (z[2] + R * z[3])) / a;
 172     }
 173
 174   c *= X[0];
 175
 176   for (i = 1; i < EX; i++)
 177     c *= RADIX;
 178   for (i = 1; i > EX; i--)
 179     c *= RADIXI;
 180
 181   *y = c;
 182 #undef R
 183 }
 184
 185 /* Convert a multiple precision number *X into a double precision
 186    number *Y, Denormal case  (|x| < 2**(-1022))).  */
 187 static void
 188 denorm (const mp_no *x, double *y, int p)
 189 {
 190   long i, k;
 191   long p2 = p;
 192   double c, u, z[5];
 193
 194 #define R RADIXI
 195   if (EX < -44 || (EX == -44 && X[1] < TWO5))
 196     {
 197       *y = ZERO;
 198       return;
 199     }
 200
 201   if (p2 == 1)
 202     {
 203       if (EX == -42)
 204         {
 205           z[1] = X[1] + TWO10;
 206           z[2] = ZERO;
 207           z[3] = ZERO;
 208           k = 3;
 209         }
 210       else if (EX == -43)
 211         {
 212           z[1] = TWO10;
 213           z[2] = X[1];
 214           z[3] = ZERO;
 215           k = 2;
 216         }
 217       else
 218         {
 219           z[1] = TWO10;
 220           z[2] = ZERO;
 221           z[3] = X[1];
 222           k = 1;
 223         }
 224     }
 225   else if (p2 == 2)
 226     {
 227       if (EX == -42)
 228         {
 229           z[1] = X[1] + TWO10;
 230           z[2] = X[2];
 231           z[3] = ZERO;
 232           k = 3;
 233         }
 234       else if (EX == -43)
 235         {
 236           z[1] = TWO10;
 237           z[2] = X[1];
 238           z[3] = X[2];
 239           k = 2;
 240         }
 241       else
 242         {
 243           z[1] = TWO10;
 244           z[2] = ZERO;
 245           z[3] = X[1];
 246           k = 1;
 247         }
 248     }
 249   else
 250     {
 251       if (EX == -42)
 252         {
 253           z[1] = X[1] + TWO10;
 254           z[2] = X[2];
 255           k = 3;
 256         }
 257       else if (EX == -43)
 258         {
 259           z[1] = TWO10;
 260           z[2] = X[1];
 261           k = 2;
 262         }
 263       else
 264         {
 265           z[1] = TWO10;
 266           z[2] = ZERO;
 267           k = 1;
 268         }
 269       z[3] = X[k];
 270     }
 271
 272   u = (z[3] + TWO57) - TWO57;
 273   if (u > z[3])
 274     u -= TWO5;
 275
 276   if (u == z[3])
 277     {
 278       for (i = k + 1; i <= p2; i++)
 279         {
 280           if (X[i] == ZERO)
 281             continue;
 282           else
 283             {
 284               z[3] += ONE;
 285               break;
 286             }
 287         }
 288     }
 289
 290   c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
 291
 292   *y = c * TWOM1032;
 293 #undef R
 294 }
 295
 296 /* Convert multiple precision number *X into double precision number *Y.  The
 297    result is correctly rounded to the nearest/even.  */
 298 void
 299 __mp_dbl (const mp_no *x, double *y, int p)
 300 {
 301   if (X[0] == ZERO)
 302     {
 303       *y = ZERO;
 304       return;
 305     }
 306
 307   if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
 308     norm (x, y, p);
 309   else
 310     denorm (x, y, p);
 311 }
 312
 313 /* Get the multiple precision equivalent of X into *Y.  If the precision is too
 314    small, the result is truncated.  */
 315 void
 316 __dbl_mp (double x, mp_no *y, int p)
 317 {
 318   long i, n;
 319   long p2 = p;
 320   double u;
 321
 322   /* Sign.  */
 323   if (x == ZERO)
 324     {
 325       Y[0] = ZERO;
 326       return;
 327     }
 328   else if (x > ZERO)
 329     Y[0] = ONE;
 330   else
 331     {
 332       Y[0] = MONE;
 333       x = -x;
 334     }
 335
 336   /* Exponent.  */
 337   for (EY = ONE; x >= RADIX; EY += ONE)
 338     x *= RADIXI;
 339   for (; x < ONE; EY -= ONE)
 340     x *= RADIX;
 341
 342   /* Digits.  */
 343   n = MIN (p2, 4);
 344   for (i = 1; i <= n; i++)
 345     {
 346       u = (x + TWO52) - TWO52;
 347       if (u > x)
 348         u -= ONE;
 349       Y[i] = u;
 350       x -= u;
 351       x *= RADIX;
 352     }
 353   for (; i <= p2; i++)
 354     Y[i] = ZERO;
 355 }
 356
 357 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
 358    sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
 359    Y and Z.  No guard digit is used.  The result equals the exact sum,
 360    truncated.  */
 361 static void
 362 add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 363 {
 364   long i, j, k;
 365   long p2 = p;
 366
 367   EZ = EX;
 368
 369   i = p2;
 370   j = p2 + EY - EX;
 371   k = p2 + 1;
 372
 373   if (j < 1)
 374     {
 375       __cpy (x, z, p);
 376       return;
 377     }
 378   else
 379     Z[k] = ZERO;
 380
 381   for (; j > 0; i--, j--)
 382     {
 383       Z[k] += X[i] + Y[j];
 384       if (Z[k] >= RADIX)
 385         {
 386           Z[k] -= RADIX;
 387           Z[--k] = ONE;
 388         }
 389       else
 390         Z[--k] = ZERO;
 391     }
 392
 393   for (; i > 0; i--)
 394     {
 395       Z[k] += X[i];
 396       if (Z[k] >= RADIX)
 397         {
 398           Z[k] -= RADIX;
 399           Z[--k] = ONE;
 400         }
 401       else
 402         Z[--k] = ZERO;
 403     }
 404
 405   if (Z[1] == ZERO)
 406     {
 407       for (i = 1; i <= p2; i++)
 408         Z[i] = Z[i + 1];
 409     }
 410   else
 411     EZ += ONE;
 412 }
 413
 414 /* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
 415    The sign of the difference *Z is not changed.  X and Y may overlap but not X
 416    and Z or Y and Z.  One guard digit is used.  The error is less than one
 417    ULP.  */
 418 static void
 419 sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 420 {
 421   long i, j, k;
 422   long p2 = p;
 423
 424   EZ = EX;
 425
 426   if (EX == EY)
 427     {
 428       i = j = k = p2;
 429       Z[k] = Z[k + 1] = ZERO;
 430     }
 431   else
 432     {
 433       j = EX - EY;
 434       if (j > p2)
 435         {
 436           __cpy (x, z, p);
 437           return;
 438         }
 439       else
 440         {
 441           i = p2;
 442           j = p2 + 1 - j;
 443           k = p2;
 444           if (Y[j] > ZERO)
 445             {
 446               Z[k + 1] = RADIX - Y[j--];
 447               Z[k] = MONE;
 448             }
 449           else
 450             {
 451               Z[k + 1] = ZERO;
 452               Z[k] = ZERO;
 453               j--;
 454             }
 455         }
 456     }
 457
 458   for (; j > 0; i--, j--)
 459     {
 460       Z[k] += (X[i] - Y[j]);
 461       if (Z[k] < ZERO)
 462         {
 463           Z[k] += RADIX;
 464           Z[--k] = MONE;
 465         }
 466       else
 467         Z[--k] = ZERO;
 468     }
 469
 470   for (; i > 0; i--)
 471     {
 472       Z[k] += X[i];
 473       if (Z[k] < ZERO)
 474         {
 475           Z[k] += RADIX;
 476           Z[--k] = MONE;
 477         }
 478       else
 479         Z[--k] = ZERO;
 480     }
 481
 482   for (i = 1; Z[i] == ZERO; i++);
 483   EZ = EZ - i + 1;
 484   for (k = 1; i <= p2 + 1;)
 485     Z[k++] = Z[i++];
 486   for (; k <= p2;)
 487     Z[k++] = ZERO;
 488 }
 489
 490 /* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
 491    and Z or Y and Z.  One guard digit is used.  The error is less than one
 492    ULP.  */
 493 void
 494 __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
 495 {
 496   int n;
 497
 498   if (X[0] == ZERO)
 499     {
 500       __cpy (y, z, p);
 501       return;
 502     }
 503   else if (Y[0] == ZERO)
 504     {
 505       __cpy (x, z, p);
 506       return;
 507     }
 508
 509   if (X[0] == Y[0])
 510     {
 511       if (__acr (x, y, p) > 0)
 512         {
 513           add_magnitudes (x, y, z, p);
 514           Z[0] = X[0];
 515         }
 516       else
 517         {
 518           add_magnitudes (y, x, z, p);
 519           Z[0] = Y[0];
 520         }
 521     }
 522   else
 523     {
 524       if ((n = __acr (x, y, p)) == 1)
 525         {
 526           sub_magnitudes (x, y, z, p);
 527           Z[0] = X[0];
 528         }
 529       else if (n == -1)
 530         {
 531           sub_magnitudes (y, x, z, p);
 532           Z[0] = Y[0];
 533         }
 534       else
 535         Z[0] = ZERO;
 536     }
 537 }
 538
 539 /* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
 540    not X and Z or Y and Z.  One guard digit is used.  The error is less than
 541    one ULP.  */
 542 void
 543 __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
 544 {
 545   int n;
 546
 547   if (X[0] == ZERO)
 548     {
 549       __cpy (y, z, p);
 550       Z[0] = -Z[0];
 551       return;
 552     }
 553   else if (Y[0] == ZERO)
 554     {
 555       __cpy (x, z, p);
 556       return;
 557     }
 558
 559   if (X[0] != Y[0])
 560     {
 561       if (__acr (x, y, p) > 0)
 562         {
 563           add_magnitudes (x, y, z, p);
 564           Z[0] = X[0];
 565         }
 566       else
 567         {
 568           add_magnitudes (y, x, z, p);
 569           Z[0] = -Y[0];
 570         }
 571     }
 572   else
 573     {
 574       if ((n = __acr (x, y, p)) == 1)
 575         {
 576           sub_magnitudes (x, y, z, p);
 577           Z[0] = X[0];
 578         }
 579       else if (n == -1)
 580         {
 581           sub_magnitudes (y, x, z, p);
 582           Z[0] = -Y[0];
 583         }
 584       else
 585         Z[0] = ZERO;
 586     }
 587 }
 588
 589 /* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
 590    and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
 591    digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
 592 void
 593 __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 594 {
 595   long i, i1, i2, j, k, k2;
 596   long p2 = p;
 597   double u, zk, zk2;
 598
 599   /* Is z=0?  */
 600   if (__glibc_unlikely (X[0] * Y[0] == ZERO))
 601     {
 602       Z[0] = ZERO;
 603       return;
 604     }
 605
 606   /* Multiply, add and carry */
 607   k2 = (p2 < 3) ? p2 + p2 : p2 + 3;
 608   zk = Z[k2] = ZERO;
 609   for (k = k2; k > 1;)
 610     {
 611       if (k > p2)
 612         {
 613           i1 = k - p2;
 614           i2 = p2 + 1;
 615         }
 616       else
 617         {
 618           i1 = 1;
 619           i2 = k;
 620         }
 621 #if 1
 622       /* Rearrange this inner loop to allow the fmadd instructions to be
 623          independent and execute in parallel on processors that have
 624          dual symmetrical FP pipelines.  */
 625       if (i1 < (i2 - 1))
 626         {
 627           /* Make sure we have at least 2 iterations.  */
 628           if (((i2 - i1) & 1L) == 1L)
 629             {
 630               /* Handle the odd iterations case.  */
 631               zk2 = x->d[i2 - 1] * y->d[i1];
 632             }
 633           else
 634             zk2 = 0.0;
 635           /* Do two multiply/adds per loop iteration, using independent
 636              accumulators; zk and zk2.  */
 637           for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2)
 638             {
 639               zk += x->d[i] * y->d[j];
 640               zk2 += x->d[i + 1] * y->d[j - 1];
 641             }
 642           zk += zk2;            /* Final sum.  */
 643         }
 644       else
 645         {
 646           /* Special case when iterations is 1.  */
 647           zk += x->d[i1] * y->d[i1];
 648         }
 649 #else
 650       /* The original code.  */
 651       for (i = i1, j = i2 - 1; i < i2; i++, j--)
 652         zk += X[i] * Y[j];
 653 #endif
 654
 655       u = (zk + CUTTER) - CUTTER;
 656       if (u > zk)
 657         u -= RADIX;
 658       Z[k] = zk - u;
 659       zk = u * RADIXI;
 660       --k;
 661     }
 662   Z[k] = zk;
 663
 664   /* Is there a carry beyond the most significant digit?  */
 665   if (Z[1] == ZERO)
 666     {
 667       for (i = 1; i <= p2; i++)
 668         Z[i] = Z[i + 1];
 669       EZ = EX + EY - 1;
 670     }
 671   else
 672     EZ = EX + EY;
 673
 674   Z[0] = X[0] * Y[0];
 675 }
 676
 677 /* Square *X and store result in *Y.  X and Y may not overlap.  For P in
 678    [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
 679    error is bounded by 1.001 ULP.  This is a faster special case of
 680    multiplication.  */
 681 void
 682 __sqr (const mp_no *x, mp_no *y, int p)
 683 {
 684   long i, j, k, ip;
 685   double u, yk;
 686
 687   /* Is z=0?  */
 688   if (__glibc_unlikely (X[0] == ZERO))
 689     {
 690       Y[0] = ZERO;
 691       return;
 692     }
 693
 694   /* We need not iterate through all X's since it's pointless to
 695      multiply zeroes.  */
 696   for (ip = p; ip > 0; ip--)
 697     if (X[ip] != ZERO)
 698       break;
 699
 700   k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
 701
 702   while (k > 2 * ip + 1)
 703     Y[k--] = ZERO;
 704
 705   yk = ZERO;
 706
 707   while (k > p)
 708     {
 709       double yk2 = 0.0;
 710       long lim = k / 2;
 711
 712       if (k % 2 == 0)
 713         {
 714           yk += X[lim] * X[lim];
 715           lim--;
 716         }
 717
 718       /* In __mul, this loop (and the one within the next while loop) run
 719          between a range to calculate the mantissa as follows:
 720
 721          Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1]
 722                 + X[n] * Y[k]
 723
 724          For X == Y, we can get away with summing halfway and doubling the
 725          result.  For cases where the range size is even, the mid-point needs
 726          to be added separately (above).  */
 727       for (i = k - p, j = p; i <= lim; i++, j--)
 728         yk2 += X[i] * X[j];
 729
 730       yk += 2.0 * yk2;
 731
 732       u = (yk + CUTTER) - CUTTER;
 733       if (u > yk)
 734         u -= RADIX;
 735       Y[k--] = yk - u;
 736       yk = u * RADIXI;
 737     }
 738
 739   while (k > 1)
 740     {
 741       double yk2 = 0.0;
 742       long lim = k / 2;
 743
 744       if (k % 2 == 0)
 745         {
 746           yk += X[lim] * X[lim];
 747           lim--;
 748         }
 749
 750       /* Likewise for this loop.  */
 751       for (i = 1, j = k - 1; i <= lim; i++, j--)
 752         yk2 += X[i] * X[j];
 753
 754       yk += 2.0 * yk2;
 755
 756       u = (yk + CUTTER) - CUTTER;
 757       if (u > yk)
 758         u -= RADIX;
 759       Y[k--] = yk - u;
 760       yk = u * RADIXI;
 761     }
 762   Y[k] = yk;
 763
 764   /* Squares are always positive.  */
 765   Y[0] = 1.0;
 766
 767   EY = 2 * EX;
 768   /* Is there a carry beyond the most significant digit?  */
 769   if (__glibc_unlikely (Y[1] == ZERO))
 770     {
 771       for (i = 1; i <= p; i++)
 772         Y[i] = Y[i + 1];
 773       EY--;
 774     }
 775 }
 776
 777 /* Invert *X and store in *Y.  Relative error bound:
 778    - For P = 2: 1.001 * R ^ (1 - P)
 779    - For P = 3: 1.063 * R ^ (1 - P)
 780    - For P > 3: 2.001 * R ^ (1 - P)
 781
 782    *X = 0 is not permissible.  */
 783 static void
 784 __inv (const mp_no *x, mp_no *y, int p)
 785 {
 786   long i;
 787   double t;
 788   mp_no z, w;
 789   static const int np1[] =
 790     { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 791     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
 792   };
 793
 794   __cpy (x, &z, p);
 795   z.e = 0;
 796   __mp_dbl (&z, &t, p);
 797   t = ONE / t;
 798   __dbl_mp (t, y, p);
 799   EY -= EX;
 800
 801   for (i = 0; i < np1[p]; i++)
 802     {
 803       __cpy (y, &w, p);
 804       __mul (x, &w, y, p);
 805       __sub (&mptwo, y, &z, p);
 806       __mul (&w, &z, y, p);
 807     }
 808 }
 809
 810 /* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
 811    or Y and Z.  Relative error bound:
 812    - For P = 2: 2.001 * R ^ (1 - P)
 813    - For P = 3: 2.063 * R ^ (1 - P)
 814    - For P > 3: 3.001 * R ^ (1 - P)
 815
 816    *X = 0 is not permissible.  */
 817 void
 818 __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
 819 {
 820   mp_no w;
 821
 822   if (X[0] == ZERO)
 823     Z[0] = ZERO;
 824   else
 825     {
 826       __inv (y, &w, p);
 827       __mul (x, &w, z, p);
 828     }
 829 }