]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
PowerPC: Arithmetic function optimizations for POWER
authorAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Fri, 30 Dec 2011 00:37:51 +0000 (19:37 -0500)
committerAdhemerval Zanella <azanella@linux.vnet.ibm.com>
Fri, 30 Dec 2011 00:37:51 +0000 (19:37 -0500)
This patch creates inline assembly functions that use intrinsic PPC
floating point instructions when the platform supports them but rely on
the internal GLIBC functions when the instructions are not implemented
(for instance, on POWER4).

ChangeLog
sysdeps/powerpc/fpu/e_sqrt.c
sysdeps/powerpc/fpu/e_sqrtf.c
sysdeps/powerpc/fpu/math_private.h
sysdeps/powerpc/powerpc64/fpu/e_sqrt.c
sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c

index 91397b51593c3965b559d8ccb003f31b54226b77..e7bb0fc14cf2500a20c1ab5d410f8da363613b0f 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2011-11-07  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
+
+       * sysdeps/powerpc/fpu/math_private.h: Using inline assembly version
+       of math functions ceil, trunc, floor, round, and sqrt, when
+       avaliable on the platform.
+       * sysdeps/powerpc/fpu/e_sqrt.c: Undefine __ieee754_sqrt to avoid
+       name clash.
+       * sysdeps/powerpc/fpu/e_sqrtf.c: Likewise.
+       * sysdeps/powerpc/powerpc64/fpu/e_sqrt.c: Likewise.
+       * sysdeps/powerpc/powerpc64/fpu/e_sqrtf.c: Likewise.
+
 2011-10-12  Ulrich Drepper  <drepper@gmail.com>
 
        Add integration with gcc's -ffinite-math-only and optimize wrapper
index f9ded25717553085fb2eb250c331ca25848d59a9..d59bd08d5c1ad1375ed42311d5c5d1fcd52440e4 100644 (file)
@@ -154,6 +154,7 @@ __slow_ieee754_sqrt (double x)
   return f_wash (x);
 }
 
+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
index 965faee8429ddb8e5eab79886e9429d9165c0e4a..9c6b860c96779a9808779119b36e4a2892b575e4 100644 (file)
@@ -130,7 +130,7 @@ __slow_ieee754_sqrtf (float x)
   return f_washf (x);
 }
 
-
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {
index 90021c6d3cb6463cf562617d6f34580698a91328..c4dd217d1d1206c1c7afea8dac36f15a3ddc6b2c 100644 (file)
@@ -1,5 +1,5 @@
 /* Private inline math functions for powerpc.
-   Copyright (C) 2006
+   Copyright (C) 2006, 2011
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
 #include <ldsodefs.h>
 #include <dl-procinfo.h>
 
+#include <math/math_private.h>
+
 # if __WORDSIZE == 64 || defined _ARCH_PWR4
 #  define __CPU_HAS_FSQRT 1
+
+#ifndef __ieee754_sqrt
+# define __ieee754_sqrt(x)             \
+  ({ double __z;                       \
+     __asm __volatile (                        \
+       "       fsqrt %0,%1\n"          \
+               : "=f" (__z)            \
+               : "f"(x));              \
+     __z; })
+#endif
+#ifndef __ieee754_sqrtf
+# define __ieee754_sqrtf(x)            \
+  ({ float __z;                                \
+     __asm __volatile (                        \
+       "       fsqrts %0,%1\n"         \
+               : "=f" (__z)            \
+               : "f"(x));              \
+     __z; })
+#endif
+
 # else
 #  define __CPU_HAS_FSQRT ((GLRO(dl_hwcap) & PPC_FEATURE_64) != 0)
+# endif        // __WORDSIZE == 64 || defined _ARCH_PWR4
+
+
+#if defined _ARCH_PWR5X
+
+# ifndef __round
+#  define __round(x)                   \
+    ({ double __z;                     \
+      __asm __volatile (               \
+       "       frin %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __roundf
+#  define __roundf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frin %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+# ifndef __trunc
+#  define __trunc(x)                   \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       friz %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __truncf
+#  define __truncf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       friz %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+# ifndef __ceil
+#  define __ceil(x)                    \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       frip %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __ceilf
+#  define __ceilf(x)                   \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frip %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
 # endif
 
+# ifndef __floor
+#  define __floor(x)                   \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       frim %0,%1\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+# ifndef __floorf
+#  define __floorf(x)                  \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       frim %0,%1\n"           \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (x));             \
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR5X */
+
+
+#if defined _ARCH_PWR6
+
+# ifndef __copysign
+#  define __copysign(x, y)             \
+    ({ double __z;                     \
+     __asm __volatile (                        \
+       "       fcpsgn %0,%1,%2\n"      \
+               : "=f" (__z)            \
+               : "f" (y), "f" (x));    \
+     __z; })
+# endif
+# ifndef __copysignf
+#  define __copysignf(x, y)            \
+    ({ float __z;                      \
+     __asm __volatile (                        \
+       "       fcpsgn %0,%1,%2\n"      \
+       "       frsp %0,%0\n"           \
+               : "=f" (__z)            \
+               : "f" (y), "f" (x));    \
+     __z; })
+# endif
+
+#endif /* defined _ARCH_PWR6 */
+
+
 # ifndef __LIBC_INTERNAL_MATH_INLINES
 extern double __slow_ieee754_sqrt (double);
 __inline double
@@ -78,6 +211,4 @@ __ieee754_sqrtf (float __x)
 }
 #endif /* __LIBC_INTERNAL_MATH_INLINES */
 
-#include <math/math_private.h>
-
 #endif /* _PPC_MATH_PRIVATE_H_ */
index 314abba3b2174db01410d60abe3f9de101e96aff..66d04ceb77161caebf8543d9f9dee6e686d80373 100644 (file)
@@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>
 
+#undef __ieee754_sqrt
 double
 __ieee754_sqrt (double x)
 {
index 71572143941e53de19299b2c312830cbcc9fc20a..847a2e4ad6b40ea1fd775dadda57dfce81599331 100644 (file)
@@ -20,6 +20,7 @@
 #include <math.h>
 #include <math_private.h>
 
+#undef __ieee754_sqrtf
 float
 __ieee754_sqrtf (float x)
 {