[thirdparty/glibc.git] / sysdeps / ieee754 / dbl-64 / mpa.c

/*
 * IBM Accurate Mathematical Library
 * written by International Business Machines Corp.
 * Copyright (C) 2001-2013 Free Software Foundation, Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */
/************************************************************************/
/*  MODULE_NAME: mpa.c                                                  */
/*                                                                      */
/*  FUNCTIONS:                                                          */
/*               mcr                                                    */
/*               acr                                                    */
/*               cpy                                                    */
/*               norm                                                   */
/*               denorm                                                 */
/*               mp_dbl                                                 */
/*               dbl_mp                                                 */
/*               add_magnitudes                                         */
/*               sub_magnitudes                                         */
/*               add                                                    */
/*               sub                                                    */
/*               mul                                                    */
/*               inv                                                    */
/*               dvd                                                    */
/*                                                                      */
/* Arithmetic functions for multiple precision numbers.                 */
/* Relative errors are bounded                                          */
/************************************************************************/


#include "endian.h"
#include "mpa.h"
#include <sys/param.h>

#ifndef SECTION
# define SECTION
#endif

#ifndef NO__CONST
const mp_no mpone = {1, {1.0, 1.0}};
const mp_no mptwo = {1, {1.0, 2.0}};
#endif

#ifndef NO___ACR
/* Compare mantissa of two multiple precision numbers regardless of the sign
   and exponent of the numbers.  */
static int
mcr (const mp_no *x, const mp_no *y, int p)
{
  int i;
  for (i = 1; i <= p; i++)
    {
      if (X[i] == Y[i])
	continue;
      else if (X[i] > Y[i])
	return 1;
      else
	return -1;
    }
  return 0;
}

/* Compare the absolute values of two multiple precision numbers.  */
int
__acr (const mp_no *x, const mp_no *y, int p)
{
  int i;

  if (X[0] == ZERO)
    {
      if (Y[0] == ZERO)
	i = 0;
      else
	i = -1;
    }
  else if (Y[0] == ZERO)
    i = 1;
  else
    {
      if (EX > EY)
	i = 1;
      else if (EX < EY)
	i = -1;
      else
	i = mcr (x, y, p);
    }

  return i;
}
#endif

#ifndef NO___CPY
/* Copy multiple precision number X into Y.  They could be the same
   number.  */
void
__cpy (const mp_no *x, mp_no *y, int p)
{
  EY = EX;
  for (int i = 0; i <= p; i++)
    Y[i] = X[i];
}
#endif

#ifndef NO___MP_DBL
/* Convert a multiple precision number *X into a double precision
   number *Y, normalized case  (|x| >= 2**(-1022))).  */
static void
norm (const mp_no *x, double *y, int p)
{
#define R  RADIXI
  int i;
  double a, c, u, v, z[5];
  if (p < 5)
    {
      if (p == 1)
	c = X[1];
      else if (p == 2)
	c = X[1] + R * X[2];
      else if (p == 3)
	c = X[1] + R * (X[2] + R * X[3]);
      else if (p == 4)
	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
    }
  else
    {
      for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
	{
	  a *= TWO;
	  z[1] *= TWO;
	}

      for (i = 2; i < 5; i++)
	{
	  z[i] = X[i] * a;
	  u = (z[i] + CUTTER) - CUTTER;
	  if (u > z[i])
	    u -= RADIX;
	  z[i] -= u;
	  z[i - 1] += u * RADIXI;
	}

      u = (z[3] + TWO71) - TWO71;
      if (u > z[3])
	u -= TWO19;
      v = z[3] - u;

      if (v == TWO18)
	{
	  if (z[4] == ZERO)
	    {
	      for (i = 5; i <= p; i++)
		{
		  if (X[i] == ZERO)
		    continue;
		  else
		    {
		      z[3] += ONE;
		      break;
		    }
		}
	    }
	  else
	    z[3] += ONE;
	}

      c = (z[1] + R * (z[2] + R * z[3])) / a;
    }

  c *= X[0];

  for (i = 1; i < EX; i++)
    c *= RADIX;
  for (i = 1; i > EX; i--)
    c *= RADIXI;

  *y = c;
#undef R
}

/* Convert a multiple precision number *X into a double precision
   number *Y, Denormal case  (|x| < 2**(-1022))).  */
static void
denorm (const mp_no *x, double *y, int p)
{
  int i, k;
  double c, u, z[5];

#define R  RADIXI
  if (EX < -44 || (EX == -44 && X[1] < TWO5))
    {
      *y = ZERO;
      return;
    }

  if (p == 1)
    {
      if (EX == -42)
	{
	  z[1] = X[1] + TWO10;
	  z[2] = ZERO;
	  z[3] = ZERO;
	  k = 3;
	}
      else if (EX == -43)
	{
	  z[1] = TWO10;
	  z[2] = X[1];
	  z[3] = ZERO;
	  k = 2;
	}
      else
	{
	  z[1] = TWO10;
	  z[2] = ZERO;
	  z[3] = X[1];
	  k = 1;
	}
    }
  else if (p == 2)
    {
      if (EX == -42)
	{
	  z[1] = X[1] + TWO10;
	  z[2] = X[2];
	  z[3] = ZERO;
	  k = 3;
	}
      else if (EX == -43)
	{
	  z[1] = TWO10;
	  z[2] = X[1];
	  z[3] = X[2];
	  k = 2;
	}
      else
	{
	  z[1] = TWO10;
	  z[2] = ZERO;
	  z[3] = X[1];
	  k = 1;
	}
    }
  else
    {
      if (EX == -42)
	{
	  z[1] = X[1] + TWO10;
	  z[2] = X[2];
	  k = 3;
	}
      else if (EX == -43)
	{
	  z[1] = TWO10;
	  z[2] = X[1];
	  k = 2;
	}
      else
	{
	  z[1] = TWO10;
	  z[2] = ZERO;
	  k = 1;
	}
      z[3] = X[k];
    }

  u = (z[3] + TWO57) - TWO57;
  if (u > z[3])
    u -= TWO5;

  if (u == z[3])
    {
      for (i = k + 1; i <= p; i++)
	{
	  if (X[i] == ZERO)
	    continue;
	  else
	    {
	      z[3] += ONE;
	      break;
	    }
	}
    }

  c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);

  *y = c * TWOM1032;
#undef R
}

/* Convert multiple precision number *X into double precision number *Y.  The
   result is correctly rounded to the nearest/even.  */
void
__mp_dbl (const mp_no *x, double *y, int p)
{
  if (X[0] == ZERO)
    {
      *y = ZERO;
      return;
    }

  if (__glibc_likely (EX > -42 || (EX == -42 && X[1] >= TWO10)))
    norm (x, y, p);
  else
    denorm (x, y, p);
}
#endif

/* Get the multiple precision equivalent of X into *Y.  If the precision is too
   small, the result is truncated.  */
void
SECTION
__dbl_mp (double x, mp_no *y, int p)
{
  int i, n;
  double u;

  /* Sign.  */
  if (x == ZERO)
    {
      Y[0] = ZERO;
      return;
    }
  else if (x > ZERO)
    Y[0] = ONE;
  else
    {
      Y[0] = MONE;
      x = -x;
    }

  /* Exponent.  */
  for (EY = ONE; x >= RADIX; EY += ONE)
    x *= RADIXI;
  for (; x < ONE; EY -= ONE)
    x *= RADIX;

  /* Digits.  */
  n = MIN (p, 4);
  for (i = 1; i <= n; i++)
    {
      u = (x + TWO52) - TWO52;
      if (u > x)
	u -= ONE;
      Y[i] = u;
      x -= u;
      x *= RADIX;
    }
  for (; i <= p; i++)
    Y[i] = ZERO;
}

/* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
   sign of the sum *Z is not changed.  X and Y may overlap but not X and Z or
   Y and Z.  No guard digit is used.  The result equals the exact sum,
   truncated.  */
static void
SECTION
add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
  int i, j, k;
  double zk;

  EZ = EX;

  i = p;
  j = p + EY - EX;
  k = p + 1;

  if (__glibc_unlikely (j < 1))
    {
      __cpy (x, z, p);
      return;
    }

  zk = ZERO;

  for (; j > 0; i--, j--)
    {
      zk += X[i] + Y[j];
      if (zk >= RADIX)
	{
	  Z[k--] = zk - RADIX;
	  zk = ONE;
	}
      else
        {
	  Z[k--] = zk;
	  zk = ZERO;
	}
    }

  for (; i > 0; i--)
    {
      zk += X[i];
      if (zk >= RADIX)
	{
	  Z[k--] = zk - RADIX;
	  zk = ONE;
	}
      else
        {
	  Z[k--] = zk;
	  zk = ZERO;
	}
    }

  if (zk == ZERO)
    {
      for (i = 1; i <= p; i++)
	Z[i] = Z[i + 1];
    }
  else
    {
      Z[1] = zk;
      EZ += ONE;
    }
}

/* Subtract the magnitudes of *X and *Y assuming that abs (*x) > abs (*y) > 0.
   The sign of the difference *Z is not changed.  X and Y may overlap but not X
   and Z or Y and Z.  One guard digit is used.  The error is less than one
   ULP.  */
static void
SECTION
sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
  int i, j, k;
  double zk;

  EZ = EX;
  i = p;
  j = p + EY - EX;
  k = p;

  /* Y is too small compared to X, copy X over to the result.  */
  if (__glibc_unlikely (j < 1))
    {
      __cpy (x, z, p);
      return;
    }

  /* The relevant least significant digit in Y is non-zero, so we factor it in
     to enhance accuracy.  */
  if (j < p && Y[j + 1] > ZERO)
    {
      Z[k + 1] = RADIX - Y[j + 1];
      zk = MONE;
    }
  else
    zk = Z[k + 1] = ZERO;

  /* Subtract and borrow.  */
  for (; j > 0; i--, j--)
    {
      zk += (X[i] - Y[j]);
      if (zk < ZERO)
	{
	  Z[k--] = zk + RADIX;
	  zk = MONE;
	}
      else
        {
	  Z[k--] = zk;
	  zk = ZERO;
	}
    }

  /* We're done with digits from Y, so it's just digits in X.  */
  for (; i > 0; i--)
    {
      zk += X[i];
      if (zk < ZERO)
	{
	  Z[k--] = zk + RADIX;
	  zk = MONE;
	}
      else
        {
	  Z[k--] = zk;
	  zk = ZERO;
	}
    }

  /* Normalize.  */
  for (i = 1; Z[i] == ZERO; i++);
  EZ = EZ - i + 1;
  for (k = 1; i <= p + 1;)
    Z[k++] = Z[i++];
  for (; k <= p;)
    Z[k++] = ZERO;
}

/* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
   and Z or Y and Z.  One guard digit is used.  The error is less than one
   ULP.  */
void
SECTION
__add (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
  int n;

  if (X[0] == ZERO)
    {
      __cpy (y, z, p);
      return;
    }
  else if (Y[0] == ZERO)
    {
      __cpy (x, z, p);
      return;
    }

  if (X[0] == Y[0])
    {
      if (__acr (x, y, p) > 0)
	{
	  add_magnitudes (x, y, z, p);
	  Z[0] = X[0];
	}
      else
	{
	  add_magnitudes (y, x, z, p);
	  Z[0] = Y[0];
	}
    }
  else
    {
      if ((n = __acr (x, y, p)) == 1)
	{
	  sub_magnitudes (x, y, z, p);
	  Z[0] = X[0];
	}
      else if (n == -1)
	{
	  sub_magnitudes (y, x, z, p);
	  Z[0] = Y[0];
	}
      else
	Z[0] = ZERO;
    }
}

/* Subtract *Y from *X and return the result in *Z.  X and Y may overlap but
   not X and Z or Y and Z.  One guard digit is used.  The error is less than
   one ULP.  */
void
SECTION
__sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
  int n;

  if (X[0] == ZERO)
    {
      __cpy (y, z, p);
      Z[0] = -Z[0];
      return;
    }
  else if (Y[0] == ZERO)
    {
      __cpy (x, z, p);
      return;
    }

  if (X[0] != Y[0])
    {
      if (__acr (x, y, p) > 0)
	{
	  add_magnitudes (x, y, z, p);
	  Z[0] = X[0];
	}
      else
	{
	  add_magnitudes (y, x, z, p);
	  Z[0] = -Y[0];
	}
    }
  else
    {
      if ((n = __acr (x, y, p)) == 1)
	{
	  sub_magnitudes (x, y, z, p);
	  Z[0] = X[0];
	}
      else if (n == -1)
	{
	  sub_magnitudes (y, x, z, p);
	  Z[0] = -Y[0];
	}
      else
	Z[0] = ZERO;
    }
}

/* Multiply *X and *Y and store result in *Z.  X and Y may overlap but not X
   and Z or Y and Z.  For P in [1, 2, 3], the exact result is truncated to P
   digits.  In case P > 3 the error is bounded by 1.001 ULP.  */
void
SECTION
__mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
  int i, j, k, ip, ip2;
  double u, zk;
  const mp_no *a;

  /* Is z=0?  */
  if (__glibc_unlikely (X[0] * Y[0] == ZERO))
    {
      Z[0] = ZERO;
      return;
    }

  /* We need not iterate through all X's and Y's since it's pointless to
     multiply zeroes.  Here, both are zero...  */
  for (ip2 = p; ip2 > 0; ip2--)
    if (X[ip2] != ZERO || Y[ip2] != ZERO)
      break;

  a = X[ip2] != ZERO ? y : x;

  /* ... and here, at least one of them is still zero.  */
  for (ip = ip2; ip > 0; ip--)
    if (a->d[ip] != ZERO)
      break;

  /* The product looks like this for p = 3 (as an example):


				a1    a2    a3
		 x		b1    b2    b3
		 -----------------------------
			     a1*b3 a2*b3 a3*b3
		       a1*b2 a2*b2 a3*b2
		 a1*b1 a2*b1 a3*b1

     So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
     for P >= 3.  We compute the above digits in two parts; the last P-1
     digits and then the first P digits.  The last P-1 digits are a sum of
     products of the input digits from P to P-k where K is 0 for the least
     significant digit and increases as we go towards the left.  The product
     term is of the form X[k]*X[P-k] as can be seen in the above example.

     The first P digits are also a sum of products with the same product term,
     except that the sum is from 1 to k.  This is also evident from the above
     example.

     Another thing that becomes evident is that only the most significant
     ip+ip2 digits of the result are non-zero, where ip and ip2 are the
     'internal precision' of the input numbers, i.e. digits after ip and ip2
     are all ZERO.  */

  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;

  while (k > ip + ip2 + 1)
    Z[k--] = ZERO;

  zk = Z[k] = ZERO;

  while (k > p)
    {
      for (i = k - p, j = p; i < p + 1; i++, j--)
	zk += X[i] * Y[j];

      u = (zk + CUTTER) - CUTTER;
      if (u > zk)
	u -= RADIX;
      Z[k--] = zk - u;
      zk = u * RADIXI;
    }

  /* The real deal.  */
  while (k > 1)
    {
      for (i = 1, j = k - 1; i < k; i++, j--)
	zk += X[i] * Y[j];

      u = (zk + CUTTER) - CUTTER;
      if (u > zk)
	u -= RADIX;
      Z[k--] = zk - u;
      zk = u * RADIXI;
    }
  Z[k] = zk;

  /* Get the exponent sum into an intermediate variable.  This is a subtle
     optimization, where given enough registers, all operations on the exponent
     happen in registers and the result is written out only once into EZ.  */
  int e = EX + EY;

  /* Is there a carry beyond the most significant digit?  */
  if (__glibc_unlikely (Z[1] == ZERO))
    {
      for (i = 1; i <= p; i++)
	Z[i] = Z[i + 1];
      e--;
    }

  EZ = e;
  Z[0] = X[0] * Y[0];
}

/* Square *X and store result in *Y.  X and Y may not overlap.  For P in
   [1, 2, 3], the exact result is truncated to P digits.  In case P > 3 the
   error is bounded by 1.001 ULP.  This is a faster special case of
   multiplication.  */
void
SECTION
__sqr (const mp_no *x, mp_no *y, int p)
{
  long i, j, k, ip;
  double u, yk;

  /* Is z=0?  */
  if (__glibc_unlikely (X[0] == ZERO))
    {
      Y[0] = ZERO;
      return;
    }

  /* We need not iterate through all X's since it's pointless to
     multiply zeroes.  */
  for (ip = p; ip > 0; ip--)
    if (X[ip] != ZERO)
      break;

  k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;

  while (k > 2 * ip + 1)
    Y[k--] = ZERO;

  yk = ZERO;

  while (k > p)
    {
      double yk2 = 0.0;
      long lim = k / 2;

      if (k % 2 == 0)
        {
	  yk += X[lim] * X[lim];
	  lim--;
	}

      for (i = k - p, j = p; i <= lim; i++, j--)
	yk2 += X[i] * X[j];

      yk += 2.0 * yk2;

      u = (yk + CUTTER) - CUTTER;
      if (u > yk)
	u -= RADIX;
      Y[k--] = yk - u;
      yk = u * RADIXI;
    }

  while (k > 1)
    {
      double yk2 = 0.0;
      long lim = k / 2;

      if (k % 2 == 0)
        {
	  yk += X[lim] * X[lim];
	  lim--;
	}

      for (i = 1, j = k - 1; i <= lim; i++, j--)
	yk2 += X[i] * X[j];

      yk += 2.0 * yk2;

      u = (yk + CUTTER) - CUTTER;
      if (u > yk)
	u -= RADIX;
      Y[k--] = yk - u;
      yk = u * RADIXI;
    }
  Y[k] = yk;

  /* Squares are always positive.  */
  Y[0] = 1.0;

  /* Get the exponent sum into an intermediate variable.  This is a subtle
     optimization, where given enough registers, all operations on the exponent
     happen in registers and the result is written out only once into EZ.  */
  int e = EX * 2;

  /* Is there a carry beyond the most significant digit?  */
  if (__glibc_unlikely (Y[1] == ZERO))
    {
      for (i = 1; i <= p; i++)
	Y[i] = Y[i + 1];
      e--;
    }

  EY = e;
}

/* Invert *X and store in *Y.  Relative error bound:
   - For P = 2: 1.001 * R ^ (1 - P)
   - For P = 3: 1.063 * R ^ (1 - P)
   - For P > 3: 2.001 * R ^ (1 - P)

   *X = 0 is not permissible.  */
static void
SECTION
__inv (const mp_no *x, mp_no *y, int p)
{
  int i;
  double t;
  mp_no z, w;
  static const int np1[] =
    { 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
  };

  __cpy (x, &z, p);
  z.e = 0;
  __mp_dbl (&z, &t, p);
  t = ONE / t;
  __dbl_mp (t, y, p);
  EY -= EX;

  for (i = 0; i < np1[p]; i++)
    {
      __cpy (y, &w, p);
      __mul (x, &w, y, p);
      __sub (&mptwo, y, &z, p);
      __mul (&w, &z, y, p);
    }
}

/* Divide *X by *Y and store result in *Z.  X and Y may overlap but not X and Z
   or Y and Z.  Relative error bound:
   - For P = 2: 2.001 * R ^ (1 - P)
   - For P = 3: 2.063 * R ^ (1 - P)
   - For P > 3: 3.001 * R ^ (1 - P)

   *X = 0 is not permissible.  */
void
SECTION
__dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
{
  mp_no w;

  if (X[0] == ZERO)
    Z[0] = ZERO;
  else
    {
      __inv (y, &w, p);
      __mul (x, &w, z, p);
    }
}
Commit	Line	Data
e4d82761 UD	1	/*
e4d82761 UD	2	* IBM Accurate Mathematical Library
aeb25823	3	* written by International Business Machines Corp.
568035b7	4	* Copyright (C) 2001-2013 Free Software Foundation, Inc.
e4d82761 UD	5	*
	6	* This program is free software; you can redistribute it and/or modify
	7	* it under the terms of the GNU Lesser General Public License as published by
cc7375ce	8	* the Free Software Foundation; either version 2.1 of the License, or
e4d82761	9	* (at your option) any later version.
50944bca	10	*
e4d82761 UD	11	* This program is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
c6c6dd48	14	* GNU Lesser General Public License for more details.
e4d82761 UD	15	*
e4d82761 UD	16	* You should have received a copy of the GNU Lesser General Public License
59ba27a6	17	* along with this program; if not, see <http://www.gnu.org/licenses/>.
e4d82761 UD	18	*/
	19	/************************************************************************/
	20	/* MODULE_NAME: mpa.c */
	21	/* */
	22	/* FUNCTIONS: */
	23	/* mcr */
	24	/* acr */
e4d82761	25	/* cpy */
e4d82761 UD	26	/* norm */
	27	/* denorm */
	28	/* mp_dbl */
	29	/* dbl_mp */
	30	/* add_magnitudes */
	31	/* sub_magnitudes */
	32	/* add */
	33	/* sub */
	34	/* mul */
	35	/* inv */
	36	/* dvd */
	37	/* */
	38	/* Arithmetic functions for multiple precision numbers. */
	39	/* Relative errors are bounded */
	40	/************************************************************************/
	41
	42
	43	#include "endian.h"
	44	#include "mpa.h"
950c99ca	45	#include <sys/param.h>
31d3cc00 UD	46
	47	#ifndef SECTION
	48	# define SECTION
	49	#endif
	50
b76eb5f0 SP	51	#ifndef NO__CONST
b76eb5f0 SP	52	const mp_no mpone = {1, {1.0, 1.0}};
da08f647	53	const mp_no mptwo = {1, {1.0, 2.0}};
b76eb5f0 SP	54	#endif
b76eb5f0 SP	55
31d3cc00	56	#ifndef NO___ACR
950c99ca SP	57	/* Compare mantissa of two multiple precision numbers regardless of the sign
950c99ca SP	58	and exponent of the numbers. */
31d3cc00	59	static int
1066a534 SP	60	mcr (const mp_no x, const mp_no y, int p)
1066a534 SP	61	{
e4d82761	62	int i;
1066a534 SP	63	for (i = 1; i <= p; i++)
	64	{
	65	if (X[i] == Y[i])
	66	continue;
	67	else if (X[i] > Y[i])
	68	return 1;
	69	else
	70	return -1;
	71	}
e4d82761 UD	72	return 0;
	73	}
	74
950c99ca	75	/* Compare the absolute values of two multiple precision numbers. */
31d3cc00	76	int
1066a534 SP	77	__acr (const mp_no x, const mp_no y, int p)
1066a534 SP	78	{
e4d82761 UD	79	int i;
e4d82761 UD	80
1066a534 SP	81	if (X[0] == ZERO)
	82	{
	83	if (Y[0] == ZERO)
	84	i = 0;
	85	else
	86	i = -1;
	87	}
	88	else if (Y[0] == ZERO)
	89	i = 1;
	90	else
	91	{
	92	if (EX > EY)
	93	i = 1;
	94	else if (EX < EY)
	95	i = -1;
	96	else
	97	i = mcr (x, y, p);
	98	}
e4d82761 UD	99
	100	return i;
	101	}
31d3cc00	102	#endif
e4d82761	103
af968f62	104	#ifndef NO___CPY
950c99ca SP	105	/* Copy multiple precision number X into Y. They could be the same
950c99ca SP	106	number. */
1066a534 SP	107	void
	108	__cpy (const mp_no x, mp_no y, int p)
	109	{
e4d82761	110	EY = EX;
1066a534 SP	111	for (int i = 0; i <= p; i++)
1066a534 SP	112	Y[i] = X[i];
e4d82761	113	}
af968f62	114	#endif
e4d82761	115
af968f62	116	#ifndef NO___MP_DBL
950c99ca SP	117	/* Convert a multiple precision number *X into a double precision
950c99ca SP	118	number Y, normalized case (\|x\| >= 2(-1022))). /
1066a534 SP	119	static void
1066a534 SP	120	norm (const mp_no x, double y, int p)
50944bca	121	{
1066a534	122	#define R RADIXI
50944bca	123	int i;
1066a534 SP	124	double a, c, u, v, z[5];
	125	if (p < 5)
	126	{
	127	if (p == 1)
	128	c = X[1];
	129	else if (p == 2)
	130	c = X[1] + R * X[2];
	131	else if (p == 3)
	132	c = X[1] + R * (X[2] + R * X[3]);
	133	else if (p == 4)
	134	c = (X[1] + R * X[2]) + R * R * (X[3] + R * X[4]);
e4d82761	135	}
1066a534 SP	136	else
	137	{
	138	for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
	139	{
	140	a *= TWO;
	141	z[1] *= TWO;
	142	}
e4d82761	143
1066a534 SP	144	for (i = 2; i < 5; i++)
	145	{
	146	z[i] = X[i] * a;
	147	u = (z[i] + CUTTER) - CUTTER;
	148	if (u > z[i])
	149	u -= RADIX;
	150	z[i] -= u;
	151	z[i - 1] += u * RADIXI;
	152	}
	153
	154	u = (z[3] + TWO71) - TWO71;
	155	if (u > z[3])
	156	u -= TWO19;
	157	v = z[3] - u;
	158
	159	if (v == TWO18)
	160	{
	161	if (z[4] == ZERO)
	162	{
	163	for (i = 5; i <= p; i++)
	164	{
	165	if (X[i] == ZERO)
	166	continue;
	167	else
	168	{
	169	z[3] += ONE;
	170	break;
	171	}
	172	}
	173	}
	174	else
	175	z[3] += ONE;
	176	}
	177
	178	c = (z[1] + R * (z[2] + R * z[3])) / a;
	179	}
e4d82761 UD	180
	181	c *= X[0];
	182
1066a534 SP	183	for (i = 1; i < EX; i++)
	184	c *= RADIX;
	185	for (i = 1; i > EX; i--)
	186	c *= RADIXI;
e4d82761 UD	187
e4d82761 UD	188	*y = c;
e4d82761 UD	189	#undef R
	190	}
	191
950c99ca SP	192	/* Convert a multiple precision number *X into a double precision
950c99ca SP	193	number Y, Denormal case (\|x\| < 2(-1022))). /
1066a534 SP	194	static void
1066a534 SP	195	denorm (const mp_no x, double y, int p)
50944bca	196	{
1066a534 SP	197	int i, k;
1066a534 SP	198	double c, u, z[5];
e4d82761	199
99136f82	200	#define R RADIXI
1066a534 SP	201	if (EX < -44 \|\| (EX == -44 && X[1] < TWO5))
	202	{
	203	*y = ZERO;
	204	return;
	205	}
	206
	207	if (p == 1)
	208	{
	209	if (EX == -42)
	210	{
	211	z[1] = X[1] + TWO10;
	212	z[2] = ZERO;
	213	z[3] = ZERO;
	214	k = 3;
	215	}
	216	else if (EX == -43)
	217	{
	218	z[1] = TWO10;
	219	z[2] = X[1];
	220	z[3] = ZERO;
	221	k = 2;
	222	}
	223	else
	224	{
	225	z[1] = TWO10;
	226	z[2] = ZERO;
	227	z[3] = X[1];
	228	k = 1;
	229	}
	230	}
	231	else if (p == 2)
	232	{
	233	if (EX == -42)
	234	{
	235	z[1] = X[1] + TWO10;
	236	z[2] = X[2];
	237	z[3] = ZERO;
	238	k = 3;
	239	}
	240	else if (EX == -43)
	241	{
	242	z[1] = TWO10;
	243	z[2] = X[1];
	244	z[3] = X[2];
	245	k = 2;
	246	}
	247	else
	248	{
	249	z[1] = TWO10;
	250	z[2] = ZERO;
	251	z[3] = X[1];
	252	k = 1;
	253	}
	254	}
	255	else
	256	{
	257	if (EX == -42)
	258	{
	259	z[1] = X[1] + TWO10;
	260	z[2] = X[2];
	261	k = 3;
	262	}
	263	else if (EX == -43)
	264	{
265	z[1] = TWO10;
266	z[2] = X[1];
267	k = 2;
268	}
269	else
270	{
271	z[1] = TWO10;
272	z[2] = ZERO;
273	k = 1;
274	}
275	z[3] = X[k];
276	}
e4d82761 UD	277
e4d82761 UD	278	u = (z[3] + TWO57) - TWO57;
1066a534 SP	279	if (u > z[3])
1066a534 SP	280	u -= TWO5;
e4d82761	281
1066a534 SP	282	if (u == z[3])
	283	{
	284	for (i = k + 1; i <= p; i++)
	285	{
	286	if (X[i] == ZERO)
	287	continue;
	288	else
	289	{
	290	z[3] += ONE;
	291	break;
	292	}
	293	}
e4d82761	294	}
e4d82761	295
1066a534	296	c = X[0] * ((z[1] + R * (z[2] + R * z[3])) - TWO10);
e4d82761	297
1066a534	298	y = c TWOM1032;
e4d82761 UD	299	#undef R
	300	}
	301
950c99ca SP	302	/* Convert multiple precision number X into double precision number Y. The
950c99ca SP	303	result is correctly rounded to the nearest/even. */
1066a534 SP	304	void
	305	__mp_dbl (const mp_no x, double y, int p)
	306	{
	307	if (X[0] == ZERO)
	308	{
	309	*y = ZERO;
	310	return;
	311	}
e4d82761	312
18ea052c	313	if (__glibc_likely (EX > -42 \|\| (EX == -42 && X[1] >= TWO10)))
1066a534	314	norm (x, y, p);
18ea052c	315	else
1066a534	316	denorm (x, y, p);
e4d82761	317	}
af968f62	318	#endif
e4d82761	319
950c99ca SP	320	/* Get the multiple precision equivalent of X into *Y. If the precision is too
950c99ca SP	321	small, the result is truncated. */
31d3cc00 UD	322	void
31d3cc00 UD	323	SECTION
1066a534 SP	324	__dbl_mp (double x, mp_no *y, int p)
	325	{
	326	int i, n;
e4d82761 UD	327	double u;
e4d82761 UD	328
950c99ca	329	/* Sign. */
1066a534 SP	330	if (x == ZERO)
	331	{
	332	Y[0] = ZERO;
	333	return;
	334	}
	335	else if (x > ZERO)
	336	Y[0] = ONE;
	337	else
	338	{
	339	Y[0] = MONE;
	340	x = -x;
	341	}
e4d82761	342
950c99ca	343	/* Exponent. */
1066a534 SP	344	for (EY = ONE; x >= RADIX; EY += ONE)
	345	x *= RADIXI;
	346	for (; x < ONE; EY -= ONE)
	347	x *= RADIX;
e4d82761	348
950c99ca	349	/* Digits. */
1066a534 SP	350	n = MIN (p, 4);
	351	for (i = 1; i <= n; i++)
	352	{
	353	u = (x + TWO52) - TWO52;
	354	if (u > x)
	355	u -= ONE;
	356	Y[i] = u;
	357	x -= u;
	358	x *= RADIX;
	359	}
	360	for (; i <= p; i++)
	361	Y[i] = ZERO;
e4d82761 UD	362	}
e4d82761 UD	363
950c99ca SP	364	/* Add magnitudes of X and Y assuming that abs (X) >= abs (Y) > 0. The
	365	sign of the sum *Z is not changed. X and Y may overlap but not X and Z or
	366	Y and Z. No guard digit is used. The result equals the exact sum,
	367	truncated. */
31d3cc00 UD	368	static void
31d3cc00 UD	369	SECTION
1066a534 SP	370	add_magnitudes (const mp_no x, const mp_no y, mp_no *z, int p)
	371	{
	372	int i, j, k;
bdf02814	373	double zk;
e4d82761 UD	374
	375	EZ = EX;
	376
1066a534 SP	377	i = p;
	378	j = p + EY - EX;
	379	k = p + 1;
	380
bdf02814	381	if (__glibc_unlikely (j < 1))
1066a534 SP	382	{
	383	__cpy (x, z, p);
	384	return;
	385	}
bdf02814 SP	386
bdf02814 SP	387	zk = ZERO;
1066a534 SP	388
	389	for (; j > 0; i--, j--)
	390	{
bdf02814 SP	391	zk += X[i] + Y[j];
bdf02814 SP	392	if (zk >= RADIX)
1066a534	393	{
bdf02814 SP	394	Z[k--] = zk - RADIX;
bdf02814 SP	395	zk = ONE;
1066a534 SP	396	}
1066a534 SP	397	else
bdf02814 SP	398	{
	399	Z[k--] = zk;
	400	zk = ZERO;
	401	}
1066a534 SP	402	}
	403
	404	for (; i > 0; i--)
	405	{
bdf02814 SP	406	zk += X[i];
bdf02814 SP	407	if (zk >= RADIX)
1066a534	408	{
bdf02814 SP	409	Z[k--] = zk - RADIX;
bdf02814 SP	410	zk = ONE;
1066a534 SP	411	}
1066a534 SP	412	else
bdf02814 SP	413	{
	414	Z[k--] = zk;
	415	zk = ZERO;
	416	}
1066a534 SP	417	}
1066a534 SP	418
bdf02814	419	if (zk == ZERO)
1066a534 SP	420	{
	421	for (i = 1; i <= p; i++)
	422	Z[i] = Z[i + 1];
	423	}
	424	else
bdf02814 SP	425	{
	426	Z[1] = zk;
	427	EZ += ONE;
	428	}
e4d82761 UD	429	}
e4d82761 UD	430
950c99ca SP	431	/* Subtract the magnitudes of X and Y assuming that abs (x) > abs (y) > 0.
	432	The sign of the difference *Z is not changed. X and Y may overlap but not X
	433	and Z or Y and Z. One guard digit is used. The error is less than one
	434	ULP. */
31d3cc00 UD	435	static void
31d3cc00 UD	436	SECTION
1066a534 SP	437	sub_magnitudes (const mp_no x, const mp_no y, mp_no *z, int p)
	438	{
	439	int i, j, k;
bdf02814	440	double zk;
e4d82761 UD	441
e4d82761 UD	442	EZ = EX;
bdf02814 SP	443	i = p;
	444	j = p + EY - EX;
	445	k = p;
e4d82761	446
bdf02814 SP	447	/* Y is too small compared to X, copy X over to the result. */
bdf02814 SP	448	if (__glibc_unlikely (j < 1))
1066a534	449	{
bdf02814 SP	450	__cpy (x, z, p);
bdf02814 SP	451	return;
1066a534	452	}
bdf02814 SP	453
	454	/* The relevant least significant digit in Y is non-zero, so we factor it in
	455	to enhance accuracy. */
	456	if (j < p && Y[j + 1] > ZERO)
1066a534	457	{
bdf02814 SP	458	Z[k + 1] = RADIX - Y[j + 1];
bdf02814 SP	459	zk = MONE;
1066a534	460	}
bdf02814 SP	461	else
bdf02814 SP	462	zk = Z[k + 1] = ZERO;
1066a534	463
bdf02814	464	/* Subtract and borrow. */
1066a534 SP	465	for (; j > 0; i--, j--)
1066a534 SP	466	{
bdf02814 SP	467	zk += (X[i] - Y[j]);
bdf02814 SP	468	if (zk < ZERO)
1066a534	469	{
bdf02814 SP	470	Z[k--] = zk + RADIX;
bdf02814 SP	471	zk = MONE;
1066a534 SP	472	}
1066a534 SP	473	else
bdf02814 SP	474	{
	475	Z[k--] = zk;
	476	zk = ZERO;
	477	}
1066a534 SP	478	}
1066a534 SP	479
bdf02814	480	/* We're done with digits from Y, so it's just digits in X. */
1066a534 SP	481	for (; i > 0; i--)
1066a534 SP	482	{
bdf02814 SP	483	zk += X[i];
bdf02814 SP	484	if (zk < ZERO)
1066a534	485	{
bdf02814 SP	486	Z[k--] = zk + RADIX;
bdf02814 SP	487	zk = MONE;
1066a534 SP	488	}
1066a534 SP	489	else
bdf02814 SP	490	{
	491	Z[k--] = zk;
	492	zk = ZERO;
	493	}
1066a534 SP	494	}
1066a534 SP	495
bdf02814	496	/* Normalize. */
1066a534	497	for (i = 1; Z[i] == ZERO; i++);
e4d82761	498	EZ = EZ - i + 1;
1066a534	499	for (k = 1; i <= p + 1;)
e4d82761	500	Z[k++] = Z[i++];
1066a534	501	for (; k <= p;)
e4d82761	502	Z[k++] = ZERO;
e4d82761 UD	503	}
e4d82761 UD	504
950c99ca SP	505	/* Add X and Y and store the result in *Z. X and Y may overlap, but not X
	506	and Z or Y and Z. One guard digit is used. The error is less than one
	507	ULP. */
31d3cc00 UD	508	void
31d3cc00 UD	509	SECTION
1066a534 SP	510	__add (const mp_no x, const mp_no y, mp_no *z, int p)
1066a534 SP	511	{
e4d82761 UD	512	int n;
e4d82761 UD	513
1066a534 SP	514	if (X[0] == ZERO)
	515	{
	516	__cpy (y, z, p);
	517	return;
	518	}
	519	else if (Y[0] == ZERO)
	520	{
	521	__cpy (x, z, p);
	522	return;
	523	}
	524
	525	if (X[0] == Y[0])
	526	{
	527	if (__acr (x, y, p) > 0)
	528	{
	529	add_magnitudes (x, y, z, p);
	530	Z[0] = X[0];
	531	}
	532	else
	533	{
	534	add_magnitudes (y, x, z, p);
	535	Z[0] = Y[0];
	536	}
	537	}
	538	else
	539	{
	540	if ((n = __acr (x, y, p)) == 1)
	541	{
	542	sub_magnitudes (x, y, z, p);
	543	Z[0] = X[0];
	544	}
	545	else if (n == -1)
	546	{
	547	sub_magnitudes (y, x, z, p);
	548	Z[0] = Y[0];
	549	}
	550	else
	551	Z[0] = ZERO;
	552	}
e4d82761 UD	553	}
e4d82761 UD	554
950c99ca SP	555	/* Subtract Y from X and return the result in *Z. X and Y may overlap but
	556	not X and Z or Y and Z. One guard digit is used. The error is less than
	557	one ULP. */
31d3cc00 UD	558	void
31d3cc00 UD	559	SECTION
1066a534 SP	560	__sub (const mp_no x, const mp_no y, mp_no *z, int p)
1066a534 SP	561	{
e4d82761 UD	562	int n;
e4d82761 UD	563
1066a534 SP	564	if (X[0] == ZERO)
	565	{
	566	__cpy (y, z, p);
	567	Z[0] = -Z[0];
	568	return;
	569	}
	570	else if (Y[0] == ZERO)
	571	{
	572	__cpy (x, z, p);
	573	return;
	574	}
	575
	576	if (X[0] != Y[0])
	577	{
	578	if (__acr (x, y, p) > 0)
	579	{
	580	add_magnitudes (x, y, z, p);
	581	Z[0] = X[0];
	582	}
	583	else
	584	{
	585	add_magnitudes (y, x, z, p);
	586	Z[0] = -Y[0];
	587	}
	588	}
	589	else
	590	{
	591	if ((n = __acr (x, y, p)) == 1)
	592	{
	593	sub_magnitudes (x, y, z, p);
	594	Z[0] = X[0];
	595	}
	596	else if (n == -1)
	597	{
	598	sub_magnitudes (y, x, z, p);
	599	Z[0] = -Y[0];
	600	}
	601	else
	602	Z[0] = ZERO;
	603	}
e4d82761 UD	604	}
e4d82761 UD	605
950c99ca SP	606	/* Multiply X and Y and store result in *Z. X and Y may overlap but not X
	607	and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P
	608	digits. In case P > 3 the error is bounded by 1.001 ULP. */
31d3cc00 UD	609	void
31d3cc00 UD	610	SECTION
1066a534 SP	611	__mul (const mp_no x, const mp_no y, mp_no *z, int p)
1066a534 SP	612	{
909279a5	613	int i, j, k, ip, ip2;
2a91b573	614	double u, zk;
2d0e0f29	615	const mp_no *a;
e4d82761	616
44e0d4c2 SP	617	/* Is z=0? */
	618	if (__glibc_unlikely (X[0] * Y[0] == ZERO))
	619	{
1066a534	620	Z[0] = ZERO;
44e0d4c2 SP	621	return;
44e0d4c2 SP	622	}
e4d82761	623
909279a5 SP	624	/* We need not iterate through all X's and Y's since it's pointless to
	625	multiply zeroes. Here, both are zero... */
	626	for (ip2 = p; ip2 > 0; ip2--)
	627	if (X[ip2] != ZERO \|\| Y[ip2] != ZERO)
	628	break;
44e0d4c2	629
2d0e0f29 SP	630	a = X[ip2] != ZERO ? y : x;
2d0e0f29 SP	631
909279a5 SP	632	/* ... and here, at least one of them is still zero. */
909279a5 SP	633	for (ip = ip2; ip > 0; ip--)
2d0e0f29	634	if (a->d[ip] != ZERO)
909279a5 SP	635	break;
	636
	637	/* The product looks like this for p = 3 (as an example):
	638
	639
	640	a1 a2 a3
	641	x b1 b2 b3
	642	-----------------------------
	643	a1b3 a2b3 a3*b3
	644	a1b2 a2b2 a3*b2
	645	a1b1 a2b1 a3*b1
	646
	647	So our K needs to ideally be P*2, but we're limiting ourselves to P + 3
	648	for P >= 3. We compute the above digits in two parts; the last P-1
	649	digits and then the first P digits. The last P-1 digits are a sum of
	650	products of the input digits from P to P-k where K is 0 for the least
	651	significant digit and increases as we go towards the left. The product
	652	term is of the form X[k]*X[P-k] as can be seen in the above example.
	653
	654	The first P digits are also a sum of products with the same product term,
	655	except that the sum is from 1 to k. This is also evident from the above
	656	example.
	657
	658	Another thing that becomes evident is that only the most significant
	659	ip+ip2 digits of the result are non-zero, where ip and ip2 are the
	660	'internal precision' of the input numbers, i.e. digits after ip and ip2
	661	are all ZERO. */
	662
	663	k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
	664
	665	while (k > ip + ip2 + 1)
	666	Z[k--] = ZERO;
	667
	668	zk = Z[k] = ZERO;
	669
	670	while (k > p)
44e0d4c2 SP	671	{
44e0d4c2 SP	672	for (i = k - p, j = p; i < p + 1; i++, j--)
2a91b573	673	zk += X[i] * Y[j];
44e0d4c2	674
2a91b573 SP	675	u = (zk + CUTTER) - CUTTER;
2a91b573 SP	676	if (u > zk)
44e0d4c2	677	u -= RADIX;
909279a5	678	Z[k--] = zk - u;
2a91b573	679	zk = u * RADIXI;
44e0d4c2 SP	680	}
44e0d4c2 SP	681
909279a5	682	/* The real deal. */
44e0d4c2 SP	683	while (k > 1)
44e0d4c2 SP	684	{
1066a534	685	for (i = 1, j = k - 1; i < k; i++, j--)
2a91b573	686	zk += X[i] * Y[j];
44e0d4c2	687
2a91b573 SP	688	u = (zk + CUTTER) - CUTTER;
2a91b573 SP	689	if (u > zk)
44e0d4c2	690	u -= RADIX;
909279a5	691	Z[k--] = zk - u;
2a91b573	692	zk = u * RADIXI;
44e0d4c2	693	}
2a91b573	694	Z[k] = zk;
44e0d4c2	695
4709fe76 SP	696	/* Get the exponent sum into an intermediate variable. This is a subtle
	697	optimization, where given enough registers, all operations on the exponent
	698	happen in registers and the result is written out only once into EZ. */
	699	int e = EX + EY;
	700
950c99ca	701	/* Is there a carry beyond the most significant digit? */
44e0d4c2 SP	702	if (__glibc_unlikely (Z[1] == ZERO))
	703	{
	704	for (i = 1; i <= p; i++)
1066a534	705	Z[i] = Z[i + 1];
4709fe76	706	e--;
44e0d4c2	707	}
e4d82761	708
4709fe76	709	EZ = e;
e4d82761	710	Z[0] = X[0] * Y[0];
e4d82761 UD	711	}
e4d82761 UD	712
d6752ccd SP	713	/* Square X and store result in Y. X and Y may not overlap. For P in
	714	[1, 2, 3], the exact result is truncated to P digits. In case P > 3 the
	715	error is bounded by 1.001 ULP. This is a faster special case of
	716	multiplication. */
	717	void
	718	SECTION
	719	__sqr (const mp_no x, mp_no y, int p)
	720	{
	721	long i, j, k, ip;
	722	double u, yk;
	723
	724	/* Is z=0? */
	725	if (__glibc_unlikely (X[0] == ZERO))
	726	{
	727	Y[0] = ZERO;
	728	return;
	729	}
	730
	731	/* We need not iterate through all X's since it's pointless to
	732	multiply zeroes. */
	733	for (ip = p; ip > 0; ip--)
	734	if (X[ip] != ZERO)
	735	break;
	736
	737	k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
	738
	739	while (k > 2 * ip + 1)
	740	Y[k--] = ZERO;
	741
	742	yk = ZERO;
	743
	744	while (k > p)
	745	{
	746	double yk2 = 0.0;
	747	long lim = k / 2;
	748
	749	if (k % 2 == 0)
	750	{
	751	yk += X[lim] * X[lim];
	752	lim--;
	753	}
	754
	755	for (i = k - p, j = p; i <= lim; i++, j--)
	756	yk2 += X[i] * X[j];
	757
	758	yk += 2.0 * yk2;
	759
	760	u = (yk + CUTTER) - CUTTER;
	761	if (u > yk)
	762	u -= RADIX;
	763	Y[k--] = yk - u;
	764	yk = u * RADIXI;
	765	}
	766
	767	while (k > 1)
	768	{
	769	double yk2 = 0.0;
	770	long lim = k / 2;
	771
	772	if (k % 2 == 0)
	773	{
	774	yk += X[lim] * X[lim];
	775	lim--;
	776	}
777
778	for (i = 1, j = k - 1; i <= lim; i++, j--)
779	yk2 += X[i] * X[j];
780
781	yk += 2.0 * yk2;
782
783	u = (yk + CUTTER) - CUTTER;
784	if (u > yk)
785	u -= RADIX;
786	Y[k--] = yk - u;
787	yk = u * RADIXI;
788	}
789	Y[k] = yk;
790
791	/* Squares are always positive. */
792	Y[0] = 1.0;
793
4709fe76 SP	794	/* Get the exponent sum into an intermediate variable. This is a subtle
	795	optimization, where given enough registers, all operations on the exponent
	796	happen in registers and the result is written out only once into EZ. */
	797	int e = EX * 2;
	798
d6752ccd SP	799	/* Is there a carry beyond the most significant digit? */
	800	if (__glibc_unlikely (Y[1] == ZERO))
	801	{
	802	for (i = 1; i <= p; i++)
	803	Y[i] = Y[i + 1];
4709fe76	804	e--;
d6752ccd	805	}
4709fe76 SP	806
4709fe76 SP	807	EY = e;
d6752ccd SP	808	}
d6752ccd SP	809
950c99ca SP	810	/* Invert X and store in Y. Relative error bound:
	811	- For P = 2: 1.001 * R ^ (1 - P)
	812	- For P = 3: 1.063 * R ^ (1 - P)
	813	- For P > 3: 2.001 * R ^ (1 - P)
e4d82761	814
950c99ca	815	X = 0 is not permissible. /
1066a534	816	static void
31d3cc00	817	SECTION
1066a534 SP	818	__inv (const mp_no x, mp_no y, int p)
1066a534 SP	819	{
50944bca	820	int i;
e4d82761	821	double t;
1066a534 SP	822	mp_no z, w;
	823	static const int np1[] =
	824	{ 0, 0, 0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
	825	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
	826	};
	827
	828	__cpy (x, &z, p);
	829	z.e = 0;
	830	__mp_dbl (&z, &t, p);
	831	t = ONE / t;
	832	__dbl_mp (t, y, p);
	833	EY -= EX;
	834
	835	for (i = 0; i < np1[p]; i++)
	836	{
	837	__cpy (y, &w, p);
	838	__mul (x, &w, y, p);
	839	__sub (&mptwo, y, &z, p);
	840	__mul (&w, &z, y, p);
	841	}
e4d82761 UD	842	}
e4d82761 UD	843
950c99ca SP	844	/* Divide X by Y and store result in *Z. X and Y may overlap but not X and Z
	845	or Y and Z. Relative error bound:
	846	- For P = 2: 2.001 * R ^ (1 - P)
	847	- For P = 3: 2.063 * R ^ (1 - P)
	848	- For P > 3: 3.001 * R ^ (1 - P)
e4d82761	849
950c99ca	850	X = 0 is not permissible. /
31d3cc00 UD	851	void
31d3cc00 UD	852	SECTION
1066a534 SP	853	__dvd (const mp_no x, const mp_no y, mp_no *z, int p)
1066a534 SP	854	{
e4d82761 UD	855	mp_no w;
e4d82761 UD	856
1066a534 SP	857	if (X[0] == ZERO)
	858	Z[0] = ZERO;
	859	else
	860	{
	861	__inv (y, &w, p);
	862	__mul (x, &w, z, p);
	863	}
e4d82761	864	}