From e20d2ea1260ef1ecd086879548ef8ac2c573d785 Mon Sep 17 00:00:00 2001 From: =?utf8?q?P=C3=A1draig=20Brady?= Date: Fri, 29 Nov 2013 04:13:11 +0000 Subject: [PATCH] build: fix potential factor build failure on arm and powerpc * src/longlong.h: Sync with the latest longlong.h from libgmp to: - avoid arm asm when being compiled for the thumb instruction [sub]set - avoid old powerpc assembly that is incompatible with newer GCC - add arm64 optimized count_trailing_zeros() - add sparc64 optimized add_ssaaaa() and umul_ppmm() --- src/longlong.h | 253 ++++++++++++++++++++++++++----------------------- 1 file changed, 137 insertions(+), 116 deletions(-) diff --git a/src/longlong.h b/src/longlong.h index eba241762b..069cef7775 100644 --- a/src/longlong.h +++ b/src/longlong.h @@ -138,30 +138,30 @@ along with this file. If not, see http://www.gnu.org/licenses/. */ or want. */ #ifdef _LONG_LONG_LIMB -#define count_leading_zeros_gcc_clz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_clzll (x); \ +#define count_leading_zeros_gcc_clz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_clzll (x); \ } while (0) #else -#define count_leading_zeros_gcc_clz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_clzl (x); \ +#define count_leading_zeros_gcc_clz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_clzl (x); \ } while (0) #endif #ifdef _LONG_LONG_LIMB -#define count_trailing_zeros_gcc_ctz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_ctzll (x); \ +#define count_trailing_zeros_gcc_ctz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_ctzll (x); \ } while (0) #else -#define count_trailing_zeros_gcc_ctz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_ctzl (x); \ +#define count_trailing_zeros_gcc_ctz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_ctzl (x); \ } while (0) #endif @@ -222,27 +222,27 @@ along with this file. If not, see http://www.gnu.org/licenses/. */ __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X)) #endif /* clz/ctz using cix */ -#if ! defined (count_leading_zeros) \ +#if ! defined (count_leading_zeros) \ && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE) /* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0. "$31" is written explicitly in the asm, since an "r" constraint won't select reg 31. There seems no need to worry about "r31" syntax for cray, - since gcc itself (pre-release 3.4) emits just $31 in various places. */ -#define ALPHA_CMPBGE_0(dst, src) \ + since gcc itself (pre-release 3.4) emits just $31 in various places. */ +#define ALPHA_CMPBGE_0(dst, src) \ do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0) /* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts them, locating the highest non-zero byte. A second __clz_tab lookup counts the leading zero bits in that byte, giving the result. */ -#define count_leading_zeros(count, x) \ - do { \ - UWtype __clz__b, __clz__c, __clz__x = (x); \ - ALPHA_CMPBGE_0 (__clz__b, __clz__x); /* zero bytes */ \ - __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F]; /* 8 to 1 byte */ \ - __clz__b = __clz__b * 8 - 7; /* 57 to 1 shift */ \ - __clz__x >>= __clz__b; \ - __clz__c = __clz_tab [__clz__x]; /* 8 to 1 bit */ \ - __clz__b = 65 - __clz__b; \ - (count) = __clz__b - __clz__c; \ +#define count_leading_zeros(count, x) \ + do { \ + UWtype __clz__b, __clz__c, __clz__x = (x); \ + ALPHA_CMPBGE_0 (__clz__b, __clz__x); /* zero bytes */ \ + __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F]; /* 8 to 1 byte */ \ + __clz__b = __clz__b * 8 - 7; /* 57 to 1 shift */ \ + __clz__x >>= __clz__b; \ + __clz__c = __clz_tab [__clz__x]; /* 8 to 1 bit */ \ + __clz__b = 65 - __clz__b; \ + (count) = __clz__b - __clz__c; \ } while (0) #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB #endif /* clz using cmpbge */ @@ -298,14 +298,14 @@ long __MPN(count_leading_zeros) (UDItype); code using "al -0x8000 && bl <= 0x8000) { \ - if (__builtin_constant_p (ah) && (ah) == 0) \ - __asm__ ("{ai|addic} %1,%3,%4\n\t{sfze|subfze} %0,%2" \ + do { \ + if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \ - else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ - __asm__ ("{ai|addic} %1,%3,%4\n\t{sfme|subfme} %0,%2" \ + else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ + __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \ - else if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{ai|addic} %1,%3,%4\n\t{ame|addme} %0,%2" \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("addic %1,%3,%4\n\taddme %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \ - else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ - __asm__ ("{ai|addic} %1,%3,%4\n\t{aze|addze} %0,%2" \ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("addic %1,%3,%4\n\taddze %0,%2" \ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \ - else \ - __asm__ ("{ai|addic} %1,%4,%5\n\t{sfe|subfe} %0,%3,%2" \ - : "=r" (sh), "=&r" (sl) \ - : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl)); \ - } else { \ - if (__builtin_constant_p (ah) && (ah) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ - : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \ - else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ - : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \ - else if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ - : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \ - else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ - : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \ - else \ - __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ - : "=r" (sh), "=&r" (sl) \ - : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ - } \ + else \ + __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl)); \ + } else { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \ + else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \ + else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \ + else \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ + } \ } while (0) #endif /* ! _LONG_LONG_LIMB */ #define count_leading_zeros(count, x) \ @@ -1753,12 +1755,31 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); " subccc %r6,%7,%%g0\n" \ " subc %r2,%3,%0" \ : "=r" (sh), "=&r" (sl) \ - : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \ + : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \ "rJ" ((al) >> 32), "rI" ((bl) >> 32) \ __CLOBBER_CC) +#if __VIS__ >= 0x300 +#undef add_ssaaaa +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ( \ + "addcc %r4, %5, %1\n" \ + " addxc %r2, %r3, %0" \ + : "=r" (sh), "=&r" (sl) \ + : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rI" (bl) __CLOBBER_CC) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (pl) = __m0 * __m1; \ + __asm__ ("umulxhi\t%2, %1, %0" \ + : "=r" (ph) \ + : "%r" (__m0), "r" (__m1)); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("lzd\t%1,%0" : "=r" (count) : "r" (x)) +#endif #endif -#if defined (__vax__) && W_TYPE_SIZE == 32 +#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ : "=g" (sh), "=&g" (sl) \ @@ -1798,7 +1819,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); : "g" ((USItype) (x))); \ } while (0) #endif -#endif /* __vax__ */ +#endif /* vax */ #if defined (__z8000__) && W_TYPE_SIZE == 16 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ @@ -1859,11 +1880,11 @@ extern UWtype mpn_umul_ppmm (UWtype *, UWtype, UWtype); #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm \ && ! defined (LONGLONG_STANDALONE) -#define umul_ppmm(wh, wl, u, v) \ - do { \ - UWtype __umul_ppmm__p0; \ - (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v)); \ - (wl) = __umul_ppmm__p0; \ +#define umul_ppmm(wh, wl, u, v) \ + do { \ + UWtype __umul_ppmm__p0; \ + (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));\ + (wl) = __umul_ppmm__p0; \ } while (0) #endif @@ -1872,11 +1893,11 @@ extern UWtype mpn_umul_ppmm_r (UWtype, UWtype, UWtype *); #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r \ && ! defined (LONGLONG_STANDALONE) -#define umul_ppmm(wh, wl, u, v) \ - do { \ - UWtype __umul_ppmm__p0; \ - (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_ppmm__p0); \ - (wl) = __umul_ppmm__p0; \ +#define umul_ppmm(wh, wl, u, v) \ + do { \ + UWtype __umul_p0; \ + (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_p0); \ + (wl) = __umul_p0; \ } while (0) #endif @@ -1887,10 +1908,10 @@ extern UWtype mpn_udiv_qrnnd (UWtype *, UWtype, UWtype, UWtype); && ! defined (LONGLONG_STANDALONE) #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ - UWtype __udiv_qrnnd__r; \ - (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \ + UWtype __udiv_qrnnd_r; \ + (q) = mpn_udiv_qrnnd (&__udiv_qrnnd_r, \ (UWtype) (n1), (UWtype) (n0), (UWtype) d); \ - (r) = __udiv_qrnnd__r; \ + (r) = __udiv_qrnnd_r; \ } while (0) #endif @@ -1901,10 +1922,10 @@ extern UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *); && ! defined (LONGLONG_STANDALONE) #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ - UWtype __udiv_qrnnd__r; \ + UWtype __udiv_qrnnd_r; \ (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d, \ - &__udiv_qrnnd__r); \ - (r) = __udiv_qrnnd__r; \ + &__udiv_qrnnd_r); \ + (r) = __udiv_qrnnd_r; \ } while (0) #endif @@ -1926,7 +1947,7 @@ extern UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *); do { \ UWtype __x; \ __x = (al) - (bl); \ - (sh) = (ah) - (bh) - ((al) < (bl)); \ + (sh) = (ah) - (bh) - ((al) < (bl)); \ (sl) = __x; \ } while (0) #endif -- 2.47.2