Revert git commit ec11dbe1aabbb45d41f5e9c21e315e2496f3b473

author Ryan S. Arnold <rsa@us.ibm.com>

Fri, 9 Sep 2011 16:58:57 +0000 (11:58 -0500)

committer Ryan S. Arnold <rsa@us.ibm.com>

Fri, 9 Sep 2011 16:58:57 +0000 (11:58 -0500)
author Ryan S. Arnold <rsa@us.ibm.com>
Fri, 9 Sep 2011 16:58:57 +0000 (11:58 -0500)
committer Ryan S. Arnold <rsa@us.ibm.com>
Fri, 9 Sep 2011 16:58:57 +0000 (11:58 -0500)
diff --git a/ChangeLog b/ChangeLog

index 5ec2eb32720885adc34a63e9d9aab81c769bd8f2..debb8846ab44f45d4d47fe37287e953333c06fcb 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -15,38 +15,6 @@
         freeing static tls block.  Add debug print.
         * elf/fl-tls.c (_dl_determine_tlsoffset): Add debug print.
  
-2011-06-28  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
-
-       * sysdeps/powerpc/fpu/e_hypot.c: optimizations for POWER5 and POWER6.
-       * sysdeps/powerpc/fpu/e_hypotf.c: likewise.
-
-2011-06-28  Adhemerval Zanella <azanella@linux.vnet.ibm.com>
-
-       * sysdeps/powerpc/fpu/e_rem_pio2f.c: splitting the code on k_rem_pio2f.c
-       * sysdeps/powerpc/fpu/k_rem_pio2f.c: some code from e_rem_pio2f.c.
-
-2011-06-15  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
-
-       * sysdeps/powerpc/fpu/Makefile: Added new objects.
-       * sysdeps/powerpc/fpu/e_hypot.c: New file: hypot optimized for POWER.
-       * sysdeps/powerpc/fpu/e_hypotf.c: New file: hypotf optimized for
-       POWER.
-       * sysdeps/powerpc/fpu/e_rem_pio2f.c: New file: optimized for POWER.
-       * sysdeps/powerpc/fpu/k_cosf.c: New file: optimized for POWER.
-       * sysdeps/powerpc/fpu/k_sinf.c: New file: optimized for POWER.
-       * sysdeps/powerpc/fpu/s_cosf.c: New file: sinf optimized for POWER.
-       * sysdeps/powerpc/fpu/s_sinf.c: New file: cosf optimized for POWER.
-       * sysdeps/powerpc/fpu/s_float_bitwise.h: New file:  bitwise operation
-       over floats.
-       * sysdeps/powerpc/powerpc32/fpu/s_float_bitwise.S: New file: bitwise
-       operation over float, PPC32 implementation.
-       * sysdeps/powerpc/powerpc32/power7/fpu/s_float_bitwise.S: New file:
-       bitwise operation over floats, PPC32 implementation using VSX.
-       * sysdeps/powerpc/powerpc64/fpu/s_float_bitwise.S: New file: bitwise
-       operation over float, PPC64 implementation.
-       * sysdeps/powerpc/powerpc64/power7/fpu/s_float_bitwise.S: New file:
-       bitwise operation over floats, PPC64 implementation using VSX.
-
  2011-05-25  Ryan S. Arnold  <rsa@us.ibm.com>
  
         * sysdeps/unix/sysv/linux/powerpc/dl-librecon.h
diff --git a/sysdeps/powerpc/fpu/e_hypot.c b/sysdeps/powerpc/fpu/e_hypot.c

index 22bd56371e0d88a0d6dfcc8f12f8b21242b78aca..c09c0c8b4694b233723eb48e6e9a33d292062c97 100644 (file)
--- a/sysdeps/powerpc/fpu/e_hypot.c
+++ b/sysdeps/powerpc/fpu/e_hypot.c
@@ -19,7 +19,6 @@
     Boston, MA 02111-1307, USA.  */
  
  #include "math.h"
-#include "math_private.h"
  
  static const double two60   = 1.152921504606847e+18;
  static const double two500  = 3.2733906078961419e+150;
@@ -37,56 +36,26 @@ static const double pdnum   = 2.225073858507201e-308;
   * is needed.
   */
  
-#ifdef _ARCH_PWR7
-/* POWER7 isinf and isnan optimization are fast. */
-# define TEST_INF_NAN(x, y)                                       \
-   if (isinf(x) || isinf(y))                                      \
-       return INFINITY;                                           \
-   if (isnan(x) || isnan(y))                                      \
-       return NAN;
-# else
-/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are
- * costly (especially for POWER6). */
-# define GET_TW0_HIGH_WORD(d1,d2,i1,i2)                           \
- do {                                                             \
-   ieee_double_shape_type gh_u1;                                  \
-   ieee_double_shape_type gh_u2;                                  \
-   gh_u1.value = (d1);                                            \
-   gh_u2.value = (d2);                                            \
-   (i1) = gh_u1.parts.msw;                                        \
-   (i2) = gh_u2.parts.msw;                                        \
- } while (0)
-
-# define TEST_INF_NAN(x, y)                                      \
- do {                                                            \
-   int32_t hx, hy;                                               \
-   GET_TW0_HIGH_WORD(x, y, hx, hy);                              \
-   if (hy > hx) {                                                \
-     uint32_t ht = hx; hx = hy; hy = ht;                         \
-   }                                                             \
-   if (hx >= 0x7ff00000) {                                       \
-     if (hx == 0x7ff00000 || hy == 0x7ff00000)                   \
-       return INFINITY;                                          \
-     return NAN;                                                 \
-   }                                                             \
- } while (0)
-
-#endif
-
-
  double
  __ieee754_hypot (double x, double y)
  {
-  x = fabs (x);
-  y = fabs (y);
-
-  TEST_INF_NAN (x, y);
+  double j;
  
+  if (isinf(x) || isinf(y))
+    {
+      return INFINITY;
+    }
+  if (isnan(x) || isnan(y))
+    {
+      return NAN;
+    }
+  x = __builtin_fabs (x);
+  y = __builtin_fabs (y);
    if (y > x)
      {
-      double t = x;
+      j = x;
        x = y;
-      y = t;
+      y = j;
      }
    if (y == 0.0 || (x / y) > two60)
      {
@@ -96,7 +65,7 @@ __ieee754_hypot (double x, double y)
      {
        x *= twoM600;
        y *= twoM600;
-      return sqrt (x * x + y * y) / twoM600;
+      return __builtin_sqrt (x * x + y * y) / twoM600;
      }
    if (y < twoM500)
      {
@@ -104,14 +73,14 @@ __ieee754_hypot (double x, double y)
         {
           x *= two1022;
           y *= two1022;
-         return sqrt (x * x + y * y) / two1022;
+         return __builtin_sqrt (x * x + y * y) / two1022;
         }
        else
         {
           x *= two600;
           y *= two600;
-         return sqrt (x * x + y * y) / two600;
+         return __builtin_sqrt (x * x + y * y) / two600;
         }
      }
-  return sqrt (x * x + y * y);
+  return __builtin_sqrt (x * x + y * y);
  }
diff --git a/sysdeps/powerpc/fpu/e_hypotf.c b/sysdeps/powerpc/fpu/e_hypotf.c

index 3e6c597cc90ec67391598608186fbb06ca033c12..55eb2cb0c8abd8b647e4ea4a97d1d0c3d808ef24 100644 (file)
--- a/sysdeps/powerpc/fpu/e_hypotf.c
+++ b/sysdeps/powerpc/fpu/e_hypotf.c
@@ -19,17 +19,6 @@
     Boston, MA 02111-1307, USA.  */
  
  #include "math.h"
-#include "math_private.h"
-
-
-static const float two30  = 1.0737418e09;
-static const float two50  = 1.1259000e15;
-static const float two60  = 1.1529221e18;
-static const float two126 = 8.5070592e+37;
-static const float twoM50 = 8.8817842e-16;
-static const float twoM60 = 6.7762644e-21;
-static const float pdnum  = 1.1754939e-38;
-
  
  /* __ieee754_hypotf(x,y)
   *
@@ -39,55 +28,34 @@ static const float pdnum  = 1.1754939e-38;
   * is needed.
   */
  
-#ifdef _ARCH_PWR7
-/* POWER7 isinf and isnan optimizations are fast. */
-# define TEST_INF_NAN(x, y)                                      \
-   if (isinff(x) || isinff(y))                                   \
-     return INFINITY;                                            \
-   if (isnanf(x) || isnanf(y))                                   \
-     return NAN;
-# else
-/* For POWER6 and below isinf/isnan triggers LHS and PLT calls are
- * costly (especially for POWER6). */
-# define GET_TWO_FLOAT_WORD(f1,f2,i1,i2)                         \
- do {                                                            \
-   ieee_float_shape_type gf_u1;                                  \
-   ieee_float_shape_type gf_u2;                                  \
-   gf_u1.value = (f1);                                           \
-   gf_u2.value = (f2);                                           \
-   (i1) = gf_u1.word;                                            \
-   (i2) = gf_u2.word;                                            \
- } while (0)
-
-# define TEST_INF_NAN(x, y)                                      \
- do {                                                            \
-   int32_t hx, hy;                                               \
-   GET_TWO_FLOAT_WORD(x, y, hx, hy);                             \
-   if (hy > hx) {                                                \
-     uint32_t ht = hx; hx = hy; hy = ht;                         \
-   }                                                             \
-   if (hx >= 0x7f800000) {                                       \
-     if (hx == 0x7f800000 || hy == 0x7f800000)                   \
-       return INFINITY;                                          \
-     return NAN;                                                 \
-   }                                                             \
- } while (0)
-#endif
-
+static const float two30  = 1.0737418e09;
+static const float two50  = 1.1259000e15;
+static const float two60  = 1.1529221e18;
+static const float two126 = 8.5070592e+37;
+static const float twoM50 = 8.8817842e-16;
+static const float twoM60 = 6.7762644e-21;
+static const float pdnum  = 1.1754939e-38;
  
  float
  __ieee754_hypotf (float x, float y)
  {
-  x = fabsf (x);
-  y = fabsf (y);
-
-  TEST_INF_NAN (x, y);
+  float j;
  
+  if (isinff(x) || isinff(y))
+    {
+      return INFINITY;
+    }
+  if (isnanf(x) || isnanf(y))
+    {
+      return NAN;
+    }
+  x = __builtin_fabsf (x);
+  y = __builtin_fabsf (y);
    if (y > x)
      {
-      float t = y;
-      y = x;
-      x = t;
+      j = x;
+      x = y;
+      y = j;
      }
    if (y == 0.0 || (x / y) > two30)
      {
@@ -97,7 +65,7 @@ __ieee754_hypotf (float x, float y)
      {
        x *= twoM60;
        y *= twoM60;
-      return sqrtf (x * x + y * y) / twoM60;
+      return __builtin_sqrtf (x * x + y * y) / twoM60;
      }
    if (y < twoM50)
      {
@@ -105,14 +73,14 @@ __ieee754_hypotf (float x, float y)
         {
           x *= two126;
           y *= two126;
-         return sqrtf (x * x + y * y) / two126;
+         return __builtin_sqrtf (x * x + y * y) / two126;
         }
        else
         {
           x *= two60;
           y *= two60;
-         return sqrtf (x * x + y * y) / two60;
+         return __builtin_sqrtf (x * x + y * y) / two60;
         }
      }
-  return sqrtf (x * x + y * y);
+  return __builtin_sqrtf (x * x + y * y);
  }
diff --git a/sysdeps/powerpc/fpu/e_rem_pio2f.c b/sysdeps/powerpc/fpu/e_rem_pio2f.c

index 9325eeb27dd9055eeca1e76fa333f0fc2644b957..5b6e7b60ef172dbb22e4c33545e249debdc99347 100644 (file)
--- a/sysdeps/powerpc/fpu/e_rem_pio2f.c
+++ b/sysdeps/powerpc/fpu/e_rem_pio2f.c
@@ -19,18 +19,71 @@
     Boston, MA 02111-1307, USA.  */
  
  #include <math.h>
+#include <stdint.h>
  
-#include "math_private.h"
  #include "s_float_bitwise.h"
  
-/* defined in sysdeps/powerpc/fpu/k_rem_pio2f.c */
-int __fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx);
+static int32_t __fp_kernel_rem_pio2f (float *, float *, float, int32_t);
  
  /* __ieee754_rem_pio2f(x,y)
   *
   * return the remainder of x rem pi/2 in y[0]+y[1]
   */
  
+static const float two_over_pi[] = {
+  1.62000000e+02, 2.49000000e+02, 1.31000000e+02, 1.10000000e+02,
+  7.80000000e+01, 6.80000000e+01, 2.10000000e+01, 4.10000000e+01,
+  2.52000000e+02, 3.90000000e+01, 8.70000000e+01, 2.09000000e+02,
+  2.45000000e+02, 5.20000000e+01, 2.21000000e+02, 1.92000000e+02,
+  2.19000000e+02, 9.80000000e+01, 1.49000000e+02, 1.53000000e+02,
+  6.00000000e+01, 6.70000000e+01, 1.44000000e+02, 6.50000000e+01,
+  2.54000000e+02, 8.10000000e+01, 9.90000000e+01, 1.71000000e+02,
+  2.22000000e+02, 1.87000000e+02, 1.97000000e+02, 9.70000000e+01,
+  1.83000000e+02, 3.60000000e+01, 1.10000000e+02, 5.80000000e+01,
+  6.60000000e+01, 7.70000000e+01, 2.10000000e+02, 2.24000000e+02,
+  6.00000000e+00, 7.30000000e+01, 4.60000000e+01, 2.34000000e+02,
+  9.00000000e+00, 2.09000000e+02, 1.46000000e+02, 2.80000000e+01,
+  2.54000000e+02, 2.90000000e+01, 2.35000000e+02, 2.80000000e+01,
+  1.77000000e+02, 4.10000000e+01, 1.67000000e+02, 6.20000000e+01,
+  2.32000000e+02, 1.30000000e+02, 5.30000000e+01, 2.45000000e+02,
+  4.60000000e+01, 1.87000000e+02, 6.80000000e+01, 1.32000000e+02,
+  2.33000000e+02, 1.56000000e+02, 1.12000000e+02, 3.80000000e+01,
+  1.80000000e+02, 9.50000000e+01, 1.26000000e+02, 6.50000000e+01,
+  5.70000000e+01, 1.45000000e+02, 2.14000000e+02, 5.70000000e+01,
+  1.31000000e+02, 8.30000000e+01, 5.70000000e+01, 2.44000000e+02,
+  1.56000000e+02, 1.32000000e+02, 9.50000000e+01, 1.39000000e+02,
+  1.89000000e+02, 2.49000000e+02, 4.00000000e+01, 5.90000000e+01,
+  3.10000000e+01, 2.48000000e+02, 1.51000000e+02, 2.55000000e+02,
+  2.22000000e+02, 5.00000000e+00, 1.52000000e+02, 1.50000000e+01,
+  2.39000000e+02, 4.70000000e+01, 1.70000000e+01, 1.39000000e+02,
+  9.00000000e+01, 1.00000000e+01, 1.09000000e+02, 3.10000000e+01,
+  1.09000000e+02, 5.40000000e+01, 1.26000000e+02, 2.07000000e+02,
+  3.90000000e+01, 2.03000000e+02, 9.00000000e+00, 1.83000000e+02,
+  7.90000000e+01, 7.00000000e+01, 6.30000000e+01, 1.02000000e+02,
+  1.58000000e+02, 9.50000000e+01, 2.34000000e+02, 4.50000000e+01,
+  1.17000000e+02, 3.90000000e+01, 1.86000000e+02, 1.99000000e+02,
+  2.35000000e+02, 2.29000000e+02, 2.41000000e+02, 1.23000000e+02,
+  6.10000000e+01, 7.00000000e+00, 5.70000000e+01, 2.47000000e+02,
+  1.38000000e+02, 8.20000000e+01, 1.46000000e+02, 2.34000000e+02,
+  1.07000000e+02, 2.51000000e+02, 9.50000000e+01, 1.77000000e+02,
+  3.10000000e+01, 1.41000000e+02, 9.30000000e+01, 8.00000000e+00,
+  8.60000000e+01, 3.00000000e+00, 4.80000000e+01, 7.00000000e+01,
+  2.52000000e+02, 1.23000000e+02, 1.07000000e+02, 1.71000000e+02,
+  2.40000000e+02, 2.07000000e+02, 1.88000000e+02, 3.20000000e+01,
+  1.54000000e+02, 2.44000000e+02, 5.40000000e+01, 2.90000000e+01,
+  1.69000000e+02, 2.27000000e+02, 1.45000000e+02, 9.70000000e+01,
+  9.40000000e+01, 2.30000000e+02, 2.70000000e+01, 8.00000000e+00,
+  1.01000000e+02, 1.53000000e+02, 1.33000000e+02, 9.50000000e+01,
+  2.00000000e+01, 1.60000000e+02, 1.04000000e+02, 6.40000000e+01,
+  1.41000000e+02, 2.55000000e+02, 2.16000000e+02, 1.28000000e+02,
+  7.70000000e+01, 1.15000000e+02, 3.90000000e+01, 4.90000000e+01,
+  6.00000000e+00, 6.00000000e+00, 2.10000000e+01, 8.60000000e+01,
+  2.02000000e+02, 1.15000000e+02, 1.68000000e+02, 2.01000000e+02,
+  9.60000000e+01, 2.26000000e+02, 1.23000000e+02, 1.92000000e+02,
+  1.40000000e+02, 1.07000000e+02
+};
+
+
  static const float npio2_hw[] = {
    1.57077026e+00, 3.14154053e+00, 4.71228027e+00, 6.28308105e+00,
    7.85388184e+00, 9.42456055e+00, 1.09953613e+01, 1.25661621e+01,
@@ -42,9 +95,24 @@ static const float npio2_hw[] = {
    4.55527344e+01, 4.71230469e+01, 4.86943359e+01, 5.02646484e+01
  };
  
+static const float PIo2[] = {
+  1.5703125000e+00,            /* 0x3fc90000 */
+  4.5776367188e-04,            /* 0x39f00000 */
+  2.5987625122e-05,            /* 0x37da0000 */
+  7.5437128544e-08,            /* 0x33a20000 */
+  6.0026650317e-11,            /* 0x2e840000 */
+  7.3896444519e-13,            /* 0x2b500000 */
+  5.3845816694e-15,            /* 0x27c20000 */
+  5.6378512969e-18,            /* 0x22d00000 */
+  8.3009228831e-20,            /* 0x1fc40000 */
+  3.2756352257e-22,            /* 0x1bc60000 */
+  6.3331015649e-25,            /* 0x17440000 */
+};
  
  static const float zero  = 0.0000000000e+00;
+static const float one   = 1.0000000000;
  static const float two8  = 2.5600000000e+02;
+static const float twon8 = 3.9062500000e-03;
  
  static const float half    = 5.0000000000e-01;
  static const float invpio2 = 6.3661980629e-01;
@@ -187,3 +255,177 @@ __ieee754_rem_pio2f (float x, float *y)
      }
    return i;
  }
+
+static int32_t
+__fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx)
+{
+  int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih, exp;
+  float z, fw, f[20], fq[20], q[20];
+
+  /* initialize jk */
+  jp = jk = 9;
+
+  /* determine jx,jv,q0, note that 3>q0 */
+  jx = nx - 1;
+  exp = __float_get_exp (e0) - 127;
+  jv = (exp - 3) / 8;
+  if (jv < 0)
+    jv = 0;
+  q0 = exp - 8 * (jv + 1);
+
+  /* set up f[0] to f[jx+jk] where f[jx+jk] = two_over_pi[jv+jk] */
+  j = jv - jx;
+  m = jx + jk;
+  for (i = 0; i <= m; i++, j++)
+    f[i] = (j < 0) ? zero : two_over_pi[j];
+
+  /* compute q[0],q[1],...q[jk] */
+  for (i = 0; i <= jk; i++)
+    {
+      for (j = 0, fw = 0.0; j <= jx; j++)
+       fw += x[j] * f[jx + i - j];
+      q[i] = fw;
+    }
+
+  jz = jk;
+recompute:
+  /* distill q[] into iq[] reversingly */
+  for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
+    {
+      fw = truncf (twon8 * z);
+      iq[i] = (int32_t) (z - two8 * fw);
+      z = q[j - 1] + fw;
+    }
+
+  /* compute n */
+  z = __scalbnf (z, q0);       /* actual value of z */
+  z -= 8.0 * floorf (z * 0.125);       /* trim off integer >= 8 */
+  n = (int32_t) z;
+  z -= truncf (z);
+  ih = 0;
+  if (q0 > 0)
+    {                          /* need iq[jz-1] to determine n */
+      i = (iq[jz - 1] >> (8 - q0));
+      n += i;
+      iq[jz - 1] -= i << (8 - q0);
+      ih = iq[jz - 1] >> (7 - q0);
+    }
+  else if (q0 == 0)
+    ih = iq[jz - 1] >> 8;
+  else if (z >= 0.5)
+    ih = 2;
+
+  if (ih > 0)
+    {                          /* q > 0.5 */
+      n += 1;
+      carry = 0;
+      for (i = 0; i < jz; i++)
+       {                       /* compute 1-q */
+         j = iq[i];
+         if (carry == 0)
+           {
+             if (j != 0)
+               {
+                 carry = 1;
+                 iq[i] = 0x100 - j;
+               }
+           }
+         else
+           iq[i] = 0xff - j;
+       }
+      if (q0 > 0)
+       {                       /* rare case: chance is 1 in 12 */
+         switch (q0)
+           {
+           case 1:
+             iq[jz - 1] &= 0x7f;
+             break;
+           case 2:
+             iq[jz - 1] &= 0x3f;
+             break;
+           }
+       }
+      if (ih == 2)
+       {
+         z = one - z;
+         if (carry != 0)
+           z -= __scalbnf (one, q0);
+       }
+    }
+
+  /* check if recomputation is needed */
+  if (z == zero)
+    {
+      j = 0;
+      for (i = jz - 1; i >= jk; i--)
+       j |= iq[i];
+      if (j == 0)
+       {                       /* need recomputation */
+         for (k = 1; iq[jk - k] == 0; k++);    /* k = no. of terms needed */
+
+         for (i = jz + 1; i <= jz + k; i++)
+           {                   /* add q[jz+1] to q[jz+k] */
+             f[jx + i] = two_over_pi[jv + i];
+             for (j = 0, fw = 0.0; j <= jx; j++)
+               fw += x[j] * f[jx + i - j];
+             q[i] = fw;
+           }
+         jz += k;
+         goto recompute;
+       }
+    }
+
+  /* chop off zero terms */
+  if (z == 0.0)
+    {
+      jz -= 1;
+      q0 -= 8;
+      while (iq[jz] == 0)
+       {
+         jz--;
+         q0 -= 8;
+       }
+    }
+  else
+    {                          /* break z into 8-bit if necessary */
+      z = __scalbnf (z, -q0);
+      if (z >= two8)
+       {
+         fw = truncf (twon8 * z);
+         iq[jz] = (int32_t) (z - two8 * fw);
+         jz += 1;
+         q0 += 8;
+         iq[jz] = (int32_t) fw;
+       }
+      else
+       iq[jz] = (int32_t) z;
+    }
+
+  /* convert integer "bit" chunk to floating-point value */
+  fw = __scalbnf (one, q0);
+  for (i = jz; i >= 0; i--)
+    {
+      q[i] = fw * (float) iq[i];
+      fw *= twon8;
+    }
+
+  /* compute PIo2[0,...,jp]*q[jz,...,0] */
+  for (i = jz; i >= 0; i--)
+    {
+      for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
+       fw += PIo2[k] * q[i + k];
+      fq[jz - i] = fw;
+    }
+
+  /* compress fq[] into y[] */
+  fw = 0.0;
+  for (i = jz; i >= 0; i--)
+    fw += fq[i];
+  y[0] = (ih == 0) ? fw : -fw;
+  fw = fq[0] - fw;
+  for (i = 1; i <= jz; i++)
+    fw += fq[i];
+  y[1] = (ih == 0) ? fw : -fw;
+
+  return n & 7;
+}
diff --git a/sysdeps/powerpc/fpu/k_rem_pio2f.c b/sysdeps/powerpc/fpu/k_rem_pio2f.c

deleted file mode 100644 (file)

index 9635340..0000000
--- a/sysdeps/powerpc/fpu/k_rem_pio2f.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/* k_rem_pio2f.c -- float version of e_rem_pio2.c
-   Copyright (C) 2011 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Adhemerval Zanella <azanella@br.ibm.com>, 2011
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-#include <math.h>
-
-#include "math_private.h"
-#include "s_float_bitwise.h"
-
-
-static const float two_over_pi[] = {
-  1.62000000e+02, 2.49000000e+02, 1.31000000e+02, 1.10000000e+02,
-  7.80000000e+01, 6.80000000e+01, 2.10000000e+01, 4.10000000e+01,
-  2.52000000e+02, 3.90000000e+01, 8.70000000e+01, 2.09000000e+02,
-  2.45000000e+02, 5.20000000e+01, 2.21000000e+02, 1.92000000e+02,
-  2.19000000e+02, 9.80000000e+01, 1.49000000e+02, 1.53000000e+02,
-  6.00000000e+01, 6.70000000e+01, 1.44000000e+02, 6.50000000e+01,
-  2.54000000e+02, 8.10000000e+01, 9.90000000e+01, 1.71000000e+02,
-  2.22000000e+02, 1.87000000e+02, 1.97000000e+02, 9.70000000e+01,
-  1.83000000e+02, 3.60000000e+01, 1.10000000e+02, 5.80000000e+01,
-  6.60000000e+01, 7.70000000e+01, 2.10000000e+02, 2.24000000e+02,
-  6.00000000e+00, 7.30000000e+01, 4.60000000e+01, 2.34000000e+02,
-  9.00000000e+00, 2.09000000e+02, 1.46000000e+02, 2.80000000e+01,
-  2.54000000e+02, 2.90000000e+01, 2.35000000e+02, 2.80000000e+01,
-  1.77000000e+02, 4.10000000e+01, 1.67000000e+02, 6.20000000e+01,
-  2.32000000e+02, 1.30000000e+02, 5.30000000e+01, 2.45000000e+02,
-  4.60000000e+01, 1.87000000e+02, 6.80000000e+01, 1.32000000e+02,
-  2.33000000e+02, 1.56000000e+02, 1.12000000e+02, 3.80000000e+01,
-  1.80000000e+02, 9.50000000e+01, 1.26000000e+02, 6.50000000e+01,
-  5.70000000e+01, 1.45000000e+02, 2.14000000e+02, 5.70000000e+01,
-  1.31000000e+02, 8.30000000e+01, 5.70000000e+01, 2.44000000e+02,
-  1.56000000e+02, 1.32000000e+02, 9.50000000e+01, 1.39000000e+02,
-  1.89000000e+02, 2.49000000e+02, 4.00000000e+01, 5.90000000e+01,
-  3.10000000e+01, 2.48000000e+02, 1.51000000e+02, 2.55000000e+02,
-  2.22000000e+02, 5.00000000e+00, 1.52000000e+02, 1.50000000e+01,
-  2.39000000e+02, 4.70000000e+01, 1.70000000e+01, 1.39000000e+02,
-  9.00000000e+01, 1.00000000e+01, 1.09000000e+02, 3.10000000e+01,
-  1.09000000e+02, 5.40000000e+01, 1.26000000e+02, 2.07000000e+02,
-  3.90000000e+01, 2.03000000e+02, 9.00000000e+00, 1.83000000e+02,
-  7.90000000e+01, 7.00000000e+01, 6.30000000e+01, 1.02000000e+02,
-  1.58000000e+02, 9.50000000e+01, 2.34000000e+02, 4.50000000e+01,
-  1.17000000e+02, 3.90000000e+01, 1.86000000e+02, 1.99000000e+02,
-  2.35000000e+02, 2.29000000e+02, 2.41000000e+02, 1.23000000e+02,
-  6.10000000e+01, 7.00000000e+00, 5.70000000e+01, 2.47000000e+02,
-  1.38000000e+02, 8.20000000e+01, 1.46000000e+02, 2.34000000e+02,
-  1.07000000e+02, 2.51000000e+02, 9.50000000e+01, 1.77000000e+02,
-  3.10000000e+01, 1.41000000e+02, 9.30000000e+01, 8.00000000e+00,
-  8.60000000e+01, 3.00000000e+00, 4.80000000e+01, 7.00000000e+01,
-  2.52000000e+02, 1.23000000e+02, 1.07000000e+02, 1.71000000e+02,
-  2.40000000e+02, 2.07000000e+02, 1.88000000e+02, 3.20000000e+01,
-  1.54000000e+02, 2.44000000e+02, 5.40000000e+01, 2.90000000e+01,
-  1.69000000e+02, 2.27000000e+02, 1.45000000e+02, 9.70000000e+01,
-  9.40000000e+01, 2.30000000e+02, 2.70000000e+01, 8.00000000e+00,
-  1.01000000e+02, 1.53000000e+02, 1.33000000e+02, 9.50000000e+01,
-  2.00000000e+01, 1.60000000e+02, 1.04000000e+02, 6.40000000e+01,
-  1.41000000e+02, 2.55000000e+02, 2.16000000e+02, 1.28000000e+02,
-  7.70000000e+01, 1.15000000e+02, 3.90000000e+01, 4.90000000e+01,
-  6.00000000e+00, 6.00000000e+00, 2.10000000e+01, 8.60000000e+01,
-  2.02000000e+02, 1.15000000e+02, 1.68000000e+02, 2.01000000e+02,
-  9.60000000e+01, 2.26000000e+02, 1.23000000e+02, 1.92000000e+02,
-  1.40000000e+02, 1.07000000e+02
-};
-
-
-static const float PIo2[] = {
-  1.5703125000e+00,            /* 0x3fc90000 */
-  4.5776367188e-04,            /* 0x39f00000 */
-  2.5987625122e-05,            /* 0x37da0000 */
-  7.5437128544e-08,            /* 0x33a20000 */
-  6.0026650317e-11,            /* 0x2e840000 */
-  7.3896444519e-13,            /* 0x2b500000 */
-  5.3845816694e-15,            /* 0x27c20000 */
-  5.6378512969e-18,            /* 0x22d00000 */
-  8.3009228831e-20,            /* 0x1fc40000 */
-  3.2756352257e-22,            /* 0x1bc60000 */
-  6.3331015649e-25,            /* 0x17440000 */
-};
-
-
-static const float zero  = 0.0000000000e+00;
-static const float one   = 1.0000000000;
-static const float twon8 = 3.9062500000e-03;
-static const float two8  = 2.5600000000e+02;
-
-
-int32_t
-__fp_kernel_rem_pio2f (float *x, float *y, float e0, int32_t nx)
-{
-  int32_t jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih, exp;
-  float z, fw, f[20], fq[20], q[20];
-
-  /* initialize jk */
-  jp = jk = 9;
-
-  /* determine jx,jv,q0, note that 3>q0 */
-  jx = nx - 1;
-  exp = __float_get_exp (e0) - 127;
-  jv = (exp - 3) / 8;
-  if (jv < 0)
-    jv = 0;
-  q0 = exp - 8 * (jv + 1);
-
-  /* set up f[0] to f[jx+jk] where f[jx+jk] = two_over_pi[jv+jk] */
-  j = jv - jx;
-  m = jx + jk;
-  for (i = 0; i <= m; i++, j++)
-    f[i] = (j < 0) ? zero : two_over_pi[j];
-
-  /* compute q[0],q[1],...q[jk] */
-  for (i = 0; i <= jk; i++)
-    {
-      for (j = 0, fw = 0.0; j <= jx; j++)
-       fw += x[j] * f[jx + i - j];
-      q[i] = fw;
-    }
-
-  jz = jk;
-recompute:
-  /* distill q[] into iq[] reversingly */
-  for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
-    {
-      fw = truncf (twon8 * z);
-      iq[i] = (int32_t) (z - two8 * fw);
-      z = q[j - 1] + fw;
-    }
-
-  /* compute n */
-  z = __scalbnf (z, q0);       /* actual value of z */
-  z -= 8.0 * floorf (z * 0.125);       /* trim off integer >= 8 */
-  n = (int32_t) z;
-  z -= truncf (z);
-  ih = 0;
-  if (q0 > 0)
-    {                          /* need iq[jz-1] to determine n */
-      i = (iq[jz - 1] >> (8 - q0));
-      n += i;
-      iq[jz - 1] -= i << (8 - q0);
-      ih = iq[jz - 1] >> (7 - q0);
-    }
-  else if (q0 == 0)
-    ih = iq[jz - 1] >> 8;
-  else if (z >= 0.5)
-    ih = 2;
-
-  if (ih > 0)
-    {                          /* q > 0.5 */
-      n += 1;
-      carry = 0;
-      for (i = 0; i < jz; i++)
-       {                       /* compute 1-q */
-         j = iq[i];
-         if (carry == 0)
-           {
-             if (j != 0)
-               {
-                 carry = 1;
-                 iq[i] = 0x100 - j;
-               }
-           }
-         else
-           iq[i] = 0xff - j;
-       }
-      if (q0 > 0)
-       {                       /* rare case: chance is 1 in 12 */
-         switch (q0)
-           {
-           case 1:
-             iq[jz - 1] &= 0x7f;
-             break;
-           case 2:
-             iq[jz - 1] &= 0x3f;
-             break;
-           }
-       }
-      if (ih == 2)
-       {
-         z = one - z;
-         if (carry != 0)
-           z -= __scalbnf (one, q0);
-       }
-    }
-
-  /* check if recomputation is needed */
-  if (z == zero)
-    {
-      j = 0;
-      for (i = jz - 1; i >= jk; i--)
-       j |= iq[i];
-      if (j == 0)
-       {                       /* need recomputation */
-         for (k = 1; iq[jk - k] == 0; k++);    /* k = no. of terms needed */
-
-         for (i = jz + 1; i <= jz + k; i++)
-           {                   /* add q[jz+1] to q[jz+k] */
-             f[jx + i] = two_over_pi[jv + i];
-             for (j = 0, fw = 0.0; j <= jx; j++)
-               fw += x[j] * f[jx + i - j];
-             q[i] = fw;
-           }
-         jz += k;
-         goto recompute;
-       }
-    }
-
-  /* chop off zero terms */
-  if (z == 0.0)
-    {
-      jz -= 1;
-      q0 -= 8;
-      while (iq[jz] == 0)
-       {
-         jz--;
-         q0 -= 8;
-       }
-    }
-  else
-    {                          /* break z into 8-bit if necessary */
-      z = __scalbnf (z, -q0);
-      if (z >= two8)
-       {
-         fw = truncf (twon8 * z);
-         iq[jz] = (int32_t) (z - two8 * fw);
-         jz += 1;
-         q0 += 8;
-         iq[jz] = (int32_t) fw;
-       }
-      else
-       iq[jz] = (int32_t) z;
-    }
-
-  /* convert integer "bit" chunk to floating-point value */
-  fw = __scalbnf (one, q0);
-  for (i = jz; i >= 0; i--)
-    {
-      q[i] = fw * (float) iq[i];
-      fw *= twon8;
-    }
-
-  /* compute PIo2[0,...,jp]*q[jz,...,0] */
-  for (i = jz; i >= 0; i--)
-    {
-      for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
-       fw += PIo2[k] * q[i + k];
-      fq[jz - i] = fw;
-    }
-
-  /* compress fq[] into y[] */
-  fw = 0.0;
-  for (i = jz; i >= 0; i--)
-    fw += fq[i];
-  y[0] = (ih == 0) ? fw : -fw;
-  fw = fq[0] - fw;
-  for (i = 1; i <= jz; i++)
-    fw += fq[i];
-  y[1] = (ih == 0) ? fw : -fw;
-
-  return n & 7;
-}
author	Ryan S. Arnold <rsa@us.ibm.com>
	Fri, 9 Sep 2011 16:58:57 +0000 (11:58 -0500)
committer	Ryan S. Arnold <rsa@us.ibm.com>
	Fri, 9 Sep 2011 16:58:57 +0000 (11:58 -0500)
ChangeLog		patch \| blob \| blame \| history
sysdeps/powerpc/fpu/e_hypot.c		patch \| blob \| blame \| history
sysdeps/powerpc/fpu/e_hypotf.c		patch \| blob \| blame \| history
sysdeps/powerpc/fpu/e_rem_pio2f.c		patch \| blob \| blame \| history
sysdeps/powerpc/fpu/k_rem_pio2f.c	[deleted file]	patch \| blob \| blame \| history