/* More subroutines needed by GCC output code on some machines. */
/* Compile this one with gcc. */
-/* Copyright (C) 1989-2016 Free Software Foundation, Inc.
+/* Copyright (C) 1989-2024 Free Software Foundation, Inc.
This file is part of GCC.
Wtype
__addvSI3 (Wtype a, Wtype b)
{
- const Wtype w = (UWtype) a + (UWtype) b;
+ Wtype w;
- if (b >= 0 ? w < a : w > a)
+ if (__builtin_add_overflow (a, b, &w))
abort ();
return w;
SItype
__addvsi3 (SItype a, SItype b)
{
- const SItype w = (USItype) a + (USItype) b;
+ SItype w;
- if (b >= 0 ? w < a : w > a)
+ if (__builtin_add_overflow (a, b, &w))
abort ();
return w;
DWtype
__addvDI3 (DWtype a, DWtype b)
{
- const DWtype w = (UDWtype) a + (UDWtype) b;
+ DWtype w;
- if (b >= 0 ? w < a : w > a)
+ if (__builtin_add_overflow (a, b, &w))
abort ();
return w;
Wtype
__subvSI3 (Wtype a, Wtype b)
{
- const Wtype w = (UWtype) a - (UWtype) b;
+ Wtype w;
- if (b >= 0 ? w > a : w < a)
+ if (__builtin_sub_overflow (a, b, &w))
abort ();
return w;
SItype
__subvsi3 (SItype a, SItype b)
{
- const SItype w = (USItype) a - (USItype) b;
+ SItype w;
- if (b >= 0 ? w > a : w < a)
+ if (__builtin_sub_overflow (a, b, &w))
abort ();
return w;
DWtype
__subvDI3 (DWtype a, DWtype b)
{
- const DWtype w = (UDWtype) a - (UDWtype) b;
+ DWtype w;
- if (b >= 0 ? w > a : w < a)
+ if (__builtin_sub_overflow (a, b, &w))
abort ();
return w;
Wtype
__mulvSI3 (Wtype a, Wtype b)
{
- const DWtype w = (DWtype) a * (DWtype) b;
+ Wtype w;
- if ((Wtype) (w >> W_TYPE_SIZE) != (Wtype) w >> (W_TYPE_SIZE - 1))
+ if (__builtin_mul_overflow (a, b, &w))
abort ();
return w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
-#undef WORD_SIZE
-#define WORD_SIZE (sizeof (SItype) * __CHAR_BIT__)
SItype
__mulvsi3 (SItype a, SItype b)
{
- const DItype w = (DItype) a * (DItype) b;
+ SItype w;
- if ((SItype) (w >> WORD_SIZE) != (SItype) w >> (WORD_SIZE-1))
+ if (__builtin_mul_overflow (a, b, &w))
abort ();
return w;
Wtype
__negvSI2 (Wtype a)
{
- const Wtype w = -(UWtype) a;
+ Wtype w;
- if (a >= 0 ? w > 0 : w < 0)
+ if (__builtin_sub_overflow (0, a, &w))
abort ();
- return w;
+ return w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
SItype
__negvsi2 (SItype a)
{
- const SItype w = -(USItype) a;
+ SItype w;
- if (a >= 0 ? w > 0 : w < 0)
+ if (__builtin_sub_overflow (0, a, &w))
abort ();
- return w;
+ return w;
}
#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
#endif
DWtype
__negvDI2 (DWtype a)
{
- const DWtype w = -(UDWtype) a;
+ DWtype w;
- if (a >= 0 ? w > 0 : w < 0)
+ if (__builtin_sub_overflow (0, a, &w))
abort ();
return w;
Wtype
__absvSI2 (Wtype a)
{
- Wtype w = a;
-
- if (a < 0)
-#ifdef L_negvsi2
- w = __negvSI2 (a);
-#else
- w = -(UWtype) a;
+ const Wtype v = 0 - (a < 0);
+ Wtype w;
- if (w < 0)
+ if (__builtin_add_overflow (a, v, &w))
abort ();
-#endif
- return w;
+ return v ^ w;
}
#ifdef COMPAT_SIMODE_TRAPPING_ARITHMETIC
SItype
__absvsi2 (SItype a)
{
- SItype w = a;
-
- if (a < 0)
-#ifdef L_negvsi2
- w = __negvsi2 (a);
-#else
- w = -(USItype) a;
+ const SItype v = 0 - (a < 0);
+ SItype w;
- if (w < 0)
+ if (__builtin_add_overflow (a, v, &w))
abort ();
-#endif
- return w;
+ return v ^ w;
}
#endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */
#endif
DWtype
__absvDI2 (DWtype a)
{
- DWtype w = a;
-
- if (a < 0)
-#ifdef L_negvdi2
- w = __negvDI2 (a);
-#else
- w = -(UDWtype) a;
+ const DWtype v = 0 - (a < 0);
+ DWtype w;
- if (w < 0)
+ if (__builtin_add_overflow (a, v, &w))
abort ();
-#endif
- return w;
+ return v ^ w;
}
#endif
\f
}
else
{
- if (uu.s.high == (Wtype) -1 && vv.s.high == (Wtype) - 1)
+ if ((uu.s.high & vv.s.high) == (Wtype) -1
+ && (uu.s.low | vv.s.low) != 0)
{
DWunion ww = {.ll = (UDWtype) (UWtype) uu.s.low
* (UDWtype) (UWtype) vv.s.low};
SItype
__bswapsi2 (SItype u)
{
- return ((((u) & 0xff000000) >> 24)
- | (((u) & 0x00ff0000) >> 8)
- | (((u) & 0x0000ff00) << 8)
- | (((u) & 0x000000ff) << 24));
+ return ((((u) & 0xff000000u) >> 24)
+ | (((u) & 0x00ff0000u) >> 8)
+ | (((u) & 0x0000ff00u) << 8)
+ | (((u) & 0x000000ffu) << 24));
}
#endif
#ifdef L_bswapdi2
#endif
\f
#if (defined (L_udivdi3) || defined (L_divdi3) || \
- defined (L_umoddi3) || defined (L_moddi3))
+ defined (L_umoddi3) || defined (L_moddi3) || \
+ defined (L_divmoddi4))
#define L_udivmoddi4
#endif
#ifdef TARGET_HAS_NO_HW_DIVIDE
#if (defined (L_udivdi3) || defined (L_divdi3) || \
- defined (L_umoddi3) || defined (L_moddi3))
+ defined (L_umoddi3) || defined (L_moddi3) || \
+ defined (L_divmoddi4))
static inline __attribute__ ((__always_inline__))
#endif
UDWtype
aligns the divisor under the dividend and then perform number of
test-subtract iterations which shift the dividend left. Number of
iterations is k + 1 where k is the number of bit positions the
- divisor must be shifted left to align it under the dividend.
+ divisor must be shifted left to align it under the dividend.
quotient bits can be saved in the rightmost positions of the dividend
as it shifts left on each test-subtract iteration. */
k = lz1 - lz2;
y = (y << k);
- /* Dividend can exceed 2 ^ (width − 1) − 1 but still be less than the
+ /* Dividend can exceed 2 ^ (width - 1) - 1 but still be less than the
aligned divisor. Normal iteration can drops the high order bit
of the dividend. Therefore, first test-subtract iteration is a
special case, saving its quotient bit in a separate location and
#else
#if (defined (L_udivdi3) || defined (L_divdi3) || \
- defined (L_umoddi3) || defined (L_moddi3))
+ defined (L_umoddi3) || defined (L_moddi3) || \
+ defined (L_divmoddi4))
static inline __attribute__ ((__always_inline__))
#endif
UDWtype
}
#endif
+#ifdef L_divmoddi4
+DWtype
+__divmoddi4 (DWtype u, DWtype v, DWtype *rp)
+{
+ Wtype c1 = 0, c2 = 0;
+ DWunion uu = {.ll = u};
+ DWunion vv = {.ll = v};
+ DWtype w;
+ DWtype r;
+
+ if (uu.s.high < 0)
+ c1 = ~c1, c2 = ~c2,
+ uu.ll = -uu.ll;
+ if (vv.s.high < 0)
+ c1 = ~c1,
+ vv.ll = -vv.ll;
+
+ w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&r);
+ if (c1)
+ w = -w;
+ if (c2)
+ r = -r;
+
+ *rp = r;
+ return w;
+}
+#endif
+
#ifdef L_umoddi3
UDWtype
__umoddi3 (UDWtype u, UDWtype v)
}
#endif
\f
+#if (defined(__BITINT_MAXWIDTH__) \
+ && (defined(L_mulbitint3) || defined(L_divmodbitint4)))
+/* _BitInt support. */
+
+/* If *P is zero or sign extended (the latter only for PREC < 0) from
+ some narrower _BitInt value, reduce precision. */
+
+static inline __attribute__((__always_inline__)) SItype
+bitint_reduce_prec (const UWtype **p, SItype prec)
+{
+ UWtype mslimb;
+ SItype i;
+ if (prec < 0)
+ {
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ i = 0;
+#else
+ i = ((USItype) -1 - prec) / W_TYPE_SIZE;
+#endif
+ mslimb = (*p)[i];
+ if (mslimb & ((UWtype) 1 << (((USItype) -1 - prec) % W_TYPE_SIZE)))
+ {
+ SItype n = ((USItype) -prec) % W_TYPE_SIZE;
+ if (n)
+ {
+ mslimb |= ((UWtype) -1 << (((USItype) -1 - prec) % W_TYPE_SIZE));
+ if (mslimb == (UWtype) -1)
+ {
+ prec += n;
+ if (prec >= -1)
+ return -2;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ ++p;
+#else
+ --i;
+#endif
+ mslimb = (*p)[i];
+ n = 0;
+ }
+ }
+ while (mslimb == (UWtype) -1)
+ {
+ prec += W_TYPE_SIZE;
+ if (prec >= -1)
+ return -2;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ ++p;
+#else
+ --i;
+#endif
+ mslimb = (*p)[i];
+ }
+ if (n == 0)
+ {
+ if ((Wtype) mslimb >= 0)
+ {
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ --p;
+#endif
+ return prec - 1;
+ }
+ }
+ return prec;
+ }
+ else
+ prec = -prec;
+ }
+ else
+ {
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ i = 0;
+#else
+ i = ((USItype) prec - 1) / W_TYPE_SIZE;
+#endif
+ mslimb = (*p)[i];
+ }
+ SItype n = ((USItype) prec) % W_TYPE_SIZE;
+ if (n)
+ {
+ mslimb &= ((UWtype) 1 << (((USItype) prec) % W_TYPE_SIZE)) - 1;
+ if (mslimb == 0)
+ {
+ prec -= n;
+ if (prec == 0)
+ return 1;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ ++p;
+#else
+ --i;
+#endif
+ mslimb = (*p)[i];
+ }
+ }
+ while (mslimb == 0)
+ {
+ prec -= W_TYPE_SIZE;
+ if (prec == 0)
+ return 1;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ ++p;
+#else
+ --i;
+#endif
+ mslimb = (*p)[i];
+ }
+ return prec;
+}
+
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+# define BITINT_INC -1
+# define BITINT_END(be, le) (be)
+#else
+# define BITINT_INC 1
+# define BITINT_END(be, le) (le)
+#endif
+
+#ifdef L_mulbitint3
+/* D = S * L. */
+
+static UWtype
+bitint_mul_1 (UWtype *d, const UWtype *s, UWtype l, SItype n)
+{
+ UWtype sv, hi, lo, c = 0;
+ do
+ {
+ sv = *s;
+ s += BITINT_INC;
+ umul_ppmm (hi, lo, sv, l);
+ c = __builtin_add_overflow (lo, c, &lo) + hi;
+ *d = lo;
+ d += BITINT_INC;
+ }
+ while (--n);
+ return c;
+}
+
+/* D += S * L. */
+
+static UWtype
+bitint_addmul_1 (UWtype *d, const UWtype *s, UWtype l, SItype n)
+{
+ UWtype sv, hi, lo, c = 0;
+ do
+ {
+ sv = *s;
+ s += BITINT_INC;
+ umul_ppmm (hi, lo, sv, l);
+ hi += __builtin_add_overflow (lo, *d, &lo);
+ c = __builtin_add_overflow (lo, c, &lo) + hi;
+ *d = lo;
+ d += BITINT_INC;
+ }
+ while (--n);
+ return c;
+}
+
+/* If XPREC is positive, it is precision in bits
+ of an unsigned _BitInt operand (which has XPREC/W_TYPE_SIZE
+ full limbs and if Xprec%W_TYPE_SIZE one partial limb.
+ If Xprec is negative, -XPREC is precision in bits
+ of a signed _BitInt operand. RETPREC should be always
+ positive. */
+
+void
+__mulbitint3 (UWtype *ret, SItype retprec,
+ const UWtype *u, SItype uprec,
+ const UWtype *v, SItype vprec)
+{
+ uprec = bitint_reduce_prec (&u, uprec);
+ vprec = bitint_reduce_prec (&v, vprec);
+ USItype auprec = uprec < 0 ? -uprec : uprec;
+ USItype avprec = vprec < 0 ? -vprec : vprec;
+
+ /* Prefer non-negative U.
+ Otherwise make sure V doesn't have higher precision than U. */
+ if ((uprec < 0 && vprec >= 0)
+ || (avprec > auprec && !(uprec >= 0 && vprec < 0)))
+ {
+ SItype p;
+ const UWtype *t;
+ p = uprec; uprec = vprec; vprec = p;
+ p = auprec; auprec = avprec; avprec = p;
+ t = u; u = v; v = t;
+ }
+
+ USItype un = auprec / W_TYPE_SIZE;
+ USItype un2 = (auprec + W_TYPE_SIZE - 1) / W_TYPE_SIZE;
+ USItype vn = avprec / W_TYPE_SIZE;
+ USItype vn2 = (avprec + W_TYPE_SIZE - 1) / W_TYPE_SIZE;
+ USItype retn = ((USItype) retprec + W_TYPE_SIZE - 1) / W_TYPE_SIZE;
+ USItype retidx, uidx, vidx;
+ UWtype vv;
+ /* Indexes of least significant limb. */
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ retidx = retn - 1;
+ uidx = un2 - 1;
+ vidx = vn2 - 1;
+#else
+ retidx = 0;
+ uidx = 0;
+ vidx = 0;
+#endif
+ if (__builtin_expect (auprec <= W_TYPE_SIZE, 0) && vprec < 0)
+ {
+ UWtype uu = u[uidx];
+ if (__builtin_expect (auprec < W_TYPE_SIZE, 0))
+ uu &= ((UWtype) 1 << (auprec % W_TYPE_SIZE)) - 1;
+ if (uu == 0)
+ {
+ /* 0 * negative would be otherwise mishandled below, so
+ handle it specially. */
+ __builtin_memset (ret, 0, retn * sizeof (UWtype));
+ return;
+ }
+ }
+ vv = v[vidx];
+ if (__builtin_expect (avprec < W_TYPE_SIZE, 0))
+ {
+ if (vprec > 0)
+ vv &= ((UWtype) 1 << (avprec % W_TYPE_SIZE)) - 1;
+ else
+ vv |= (UWtype) -1 << (avprec % W_TYPE_SIZE);
+ }
+
+ USItype n = un > retn ? retn : un;
+ USItype n2 = n;
+ USItype retidx2 = retidx + n * BITINT_INC;
+ UWtype c = 0, uv = 0;
+ if (n)
+ c = bitint_mul_1 (ret + retidx, u + uidx, vv, n);
+ if (retn > un && un2 != un)
+ {
+ UWtype hi, lo;
+ uv = u[uidx + n * BITINT_INC];
+ if (uprec > 0)
+ uv &= ((UWtype) 1 << (auprec % W_TYPE_SIZE)) - 1;
+ else
+ uv |= (UWtype) -1 << (auprec % W_TYPE_SIZE);
+ umul_ppmm (hi, lo, uv, vv);
+ c = __builtin_add_overflow (lo, c, &lo) + hi;
+ ret[retidx2] = lo;
+ retidx2 += BITINT_INC;
+ ++n2;
+ }
+ if (retn > un2)
+ {
+ if (uprec < 0)
+ {
+ while (n2 < retn)
+ {
+ if (n2 >= un2 + vn2)
+ break;
+ UWtype hi, lo;
+ umul_ppmm (hi, lo, (UWtype) -1, vv);
+ c = __builtin_add_overflow (lo, c, &lo) + hi;
+ ret[retidx2] = lo;
+ retidx2 += BITINT_INC;
+ ++n2;
+ }
+ }
+ else
+ {
+ ret[retidx2] = c;
+ retidx2 += BITINT_INC;
+ ++n2;
+ }
+ /* If RET has more limbs than U after precision reduction,
+ fill in the remaining limbs. */
+ while (n2 < retn)
+ {
+ if (n2 < un2 + vn2 || (uprec ^ vprec) >= 0)
+ c = 0;
+ else
+ c = (UWtype) -1;
+ ret[retidx2] = c;
+ retidx2 += BITINT_INC;
+ ++n2;
+ }
+ }
+ /* N is now number of possibly non-zero limbs in RET (ignoring
+ limbs above UN2 + VN2 which if any have been finalized already). */
+ USItype end = vprec < 0 ? un2 + vn2 : vn2;
+ if (retn > un2 + vn2) retn = un2 + vn2;
+ if (end > retn) end = retn;
+ for (USItype m = 1; m < end; ++m)
+ {
+ retidx += BITINT_INC;
+ vidx += BITINT_INC;
+ if (m < vn2)
+ {
+ vv = v[vidx];
+ if (__builtin_expect (m == vn, 0))
+ {
+ if (vprec > 0)
+ vv &= ((UWtype) 1 << (avprec % W_TYPE_SIZE)) - 1;
+ else
+ vv |= (UWtype) -1 << (avprec % W_TYPE_SIZE);
+ }
+ }
+ else
+ vv = (UWtype) -1;
+ if (m + n > retn)
+ n = retn - m;
+ c = 0;
+ if (n)
+ c = bitint_addmul_1 (ret + retidx, u + uidx, vv, n);
+ n2 = m + n;
+ retidx2 = retidx + n * BITINT_INC;
+ if (n2 < retn && un2 != un)
+ {
+ UWtype hi, lo;
+ umul_ppmm (hi, lo, uv, vv);
+ hi += __builtin_add_overflow (lo, ret[retidx2], &lo);
+ c = __builtin_add_overflow (lo, c, &lo) + hi;
+ ret[retidx2] = lo;
+ retidx2 += BITINT_INC;
+ ++n2;
+ }
+ if (uprec < 0)
+ while (n2 < retn)
+ {
+ UWtype hi, lo;
+ umul_ppmm (hi, lo, (UWtype) -1, vv);
+ hi += __builtin_add_overflow (lo, ret[retidx2], &lo);
+ c = __builtin_add_overflow (lo, c, &lo) + hi;
+ ret[retidx2] = lo;
+ retidx2 += BITINT_INC;
+ ++n2;
+ }
+ else if (n2 < retn)
+ {
+ ret[retidx2] = c;
+ retidx2 += BITINT_INC;
+ }
+ }
+}
+#endif
+
+#ifdef L_divmodbitint4
+/* D = -S. */
+
+static void
+bitint_negate (UWtype *d, const UWtype *s, SItype n)
+{
+ UWtype c = 1;
+ do
+ {
+ UWtype sv = *s, lo;
+ s += BITINT_INC;
+ c = __builtin_add_overflow (~sv, c, &lo);
+ *d = lo;
+ d += BITINT_INC;
+ }
+ while (--n);
+}
+
+/* D -= S * L. */
+
+static UWtype
+bitint_submul_1 (UWtype *d, const UWtype *s, UWtype l, SItype n)
+{
+ UWtype sv, hi, lo, c = 0;
+ do
+ {
+ sv = *s;
+ s += BITINT_INC;
+ umul_ppmm (hi, lo, sv, l);
+ hi += __builtin_sub_overflow (*d, lo, &lo);
+ c = __builtin_sub_overflow (lo, c, &lo) + hi;
+ *d = lo;
+ d += BITINT_INC;
+ }
+ while (--n);
+ return c;
+}
+
+/* If XPREC is positive, it is precision in bits
+ of an unsigned _BitInt operand (which has XPREC/W_TYPE_SIZE
+ full limbs and if Xprec%W_TYPE_SIZE one partial limb.
+ If Xprec is negative, -XPREC is precision in bits
+ of a signed _BitInt operand. QPREC and RPREC should be
+ always non-negative. If either Q or R is NULL (at least
+ one should be non-NULL), then corresponding QPREC or RPREC
+ should be 0. */
+
+void
+__divmodbitint4 (UWtype *q, SItype qprec,
+ UWtype *r, SItype rprec,
+ const UWtype *u, SItype uprec,
+ const UWtype *v, SItype vprec)
+{
+ uprec = bitint_reduce_prec (&u, uprec);
+ vprec = bitint_reduce_prec (&v, vprec);
+ USItype auprec = uprec < 0 ? -uprec : uprec;
+ USItype avprec = vprec < 0 ? -vprec : vprec;
+ USItype un = (auprec + W_TYPE_SIZE - 1) / W_TYPE_SIZE;
+ USItype vn = (avprec + W_TYPE_SIZE - 1) / W_TYPE_SIZE;
+ USItype qn = ((USItype) qprec + W_TYPE_SIZE - 1) / W_TYPE_SIZE;
+ USItype rn = ((USItype) rprec + W_TYPE_SIZE - 1) / W_TYPE_SIZE;
+ USItype up = auprec % W_TYPE_SIZE;
+ USItype vp = avprec % W_TYPE_SIZE;
+ if (__builtin_expect (un < vn, 0))
+ {
+ /* If abs(v) > abs(u), then q is 0 and r is u. */
+ if (q)
+ __builtin_memset (q, 0, qn * sizeof (UWtype));
+ if (r == NULL)
+ return;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ r += rn - 1;
+ u += un - 1;
+#endif
+ if (up)
+ --un;
+ if (rn < un)
+ un = rn;
+ for (rn -= un; un; --un)
+ {
+ *r = *u;
+ r += BITINT_INC;
+ u += BITINT_INC;
+ }
+ if (!rn)
+ return;
+ if (up)
+ {
+ if (uprec > 0)
+ *r = *u & (((UWtype) 1 << up) - 1);
+ else
+ *r = *u | ((UWtype) -1 << up);
+ r += BITINT_INC;
+ if (!--rn)
+ return;
+ }
+ UWtype c = uprec < 0 ? (UWtype) -1 : (UWtype) 0;
+ for (; rn; --rn)
+ {
+ *r = c;
+ r += BITINT_INC;
+ }
+ return;
+ }
+ USItype qn2 = un - vn + 1;
+ if (qn >= qn2)
+ qn2 = 0;
+ USItype sz = un + 1 + vn + qn2;
+ UWtype *buf = __builtin_alloca (sz * sizeof (UWtype));
+ USItype uidx, vidx;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ uidx = un - 1;
+ vidx = vn - 1;
+#else
+ uidx = 0;
+ vidx = 0;
+#endif
+ if (uprec < 0)
+ bitint_negate (buf + BITINT_END (uidx + 1, 0), u + uidx, un);
+ else
+ __builtin_memcpy (buf + BITINT_END (1, 0), u, un * sizeof (UWtype));
+ if (up)
+ buf[BITINT_END (1, un - 1)] &= (((UWtype) 1 << up) - 1);
+ if (vprec < 0)
+ bitint_negate (buf + un + 1 + vidx, v + vidx, vn);
+ else
+ __builtin_memcpy (buf + un + 1, v, vn * sizeof (UWtype));
+ if (vp)
+ buf[un + 1 + BITINT_END (0, vn - 1)] &= (((UWtype) 1 << vp) - 1);
+ UWtype *u2 = buf;
+ UWtype *v2 = u2 + un + 1;
+ UWtype *q2 = v2 + vn;
+ if (!qn2)
+ q2 = q + BITINT_END (qn - (un - vn + 1), 0);
+
+ /* Knuth's algorithm. See also ../gcc/wide-int.cc (divmod_internal_2). */
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+ /* Handle single limb divisor first. */
+ if (vn == 1)
+ {
+ UWtype vv = v2[0];
+ if (vv == 0)
+ vv = 1 / vv; /* Divide intentionally by zero. */
+ UWtype k = 0;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (SItype i = 0; i <= un - 1; ++i)
+#else
+ for (SItype i = un - 1; i >= 0; --i)
+#endif
+ udiv_qrnnd (q2[i], k, k, u2[BITINT_END (i + 1, i)], vv);
+ if (r != NULL)
+ r[BITINT_END (rn - 1, 0)] = k;
+ }
+ else
+#endif
+ {
+ SItype s;
+#ifdef UDIV_NEEDS_NORMALIZATION
+ if (vn == 1 && v2[0] == 0)
+ s = 0;
+ else
+#endif
+ if (sizeof (0U) == sizeof (UWtype))
+ s = __builtin_clz (v2[BITINT_END (0, vn - 1)]);
+ else if (sizeof (0UL) == sizeof (UWtype))
+ s = __builtin_clzl (v2[BITINT_END (0, vn - 1)]);
+ else
+ s = __builtin_clzll (v2[BITINT_END (0, vn - 1)]);
+ if (s)
+ {
+ /* Normalize by shifting v2 left so that it has msb set. */
+ const SItype n = sizeof (UWtype) * __CHAR_BIT__;
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (SItype i = 0; i < vn - 1; ++i)
+#else
+ for (SItype i = vn - 1; i > 0; --i)
+#endif
+ v2[i] = (v2[i] << s) | (v2[i - BITINT_INC] >> (n - s));
+ v2[vidx] = v2[vidx] << s;
+ /* And shift u2 left by the same amount. */
+ u2[BITINT_END (0, un)] = u2[BITINT_END (1, un - 1)] >> (n - s);
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (SItype i = 1; i < un; ++i)
+#else
+ for (SItype i = un - 1; i > 0; --i)
+#endif
+ u2[i] = (u2[i] << s) | (u2[i - BITINT_INC] >> (n - s));
+ u2[BITINT_END (un, 0)] = u2[BITINT_END (un, 0)] << s;
+ }
+ else
+ u2[BITINT_END (0, un)] = 0;
+#ifdef UDIV_NEEDS_NORMALIZATION
+ /* Handle single limb divisor first. */
+ if (vn == 1)
+ {
+ UWtype vv = v2[0];
+ if (vv == 0)
+ vv = 1 / vv; /* Divide intentionally by zero. */
+ UWtype k = u2[BITINT_END (0, un)];
+#if __LIBGCC_BITINT_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (SItype i = 0; i <= un - 1; ++i)
+#else
+ for (SItype i = un - 1; i >= 0; --i)
+#endif
+ udiv_qrnnd (q2[i], k, k, u2[BITINT_END (i + 1, i)], vv);
+ if (r != NULL)
+ r[BITINT_END (rn - 1, 0)] = k >> s;
+ }
+ else
+#endif
+ {
+ UWtype vv1 = v2[BITINT_END (0, vn - 1)];
+ UWtype vv0 = v2[BITINT_END (1, vn - 2)];
+ /* Main loop. */
+ for (SItype j = un - vn; j >= 0; --j)
+ {
+ /* Compute estimate in qhat. */
+ UWtype uv1 = u2[BITINT_END (un - j - vn, j + vn)];
+ UWtype uv0 = u2[BITINT_END (un - j - vn + 1, j + vn - 1)];
+ UWtype qhat, rhat, hi, lo, c;
+ if (uv1 >= vv1)
+ {
+ /* udiv_qrnnd doesn't support quotients which don't
+ fit into UWtype, so subtract from uv1:uv0 vv1
+ first. */
+ uv1 -= vv1 + __builtin_sub_overflow (uv0, vv1, &uv0);
+ udiv_qrnnd (qhat, rhat, uv1, uv0, vv1);
+ if (!__builtin_add_overflow (rhat, vv1, &rhat))
+ goto again;
+ }
+ else
+ {
+ udiv_qrnnd (qhat, rhat, uv1, uv0, vv1);
+ again:
+ umul_ppmm (hi, lo, qhat, vv0);
+ if (hi > rhat
+ || (hi == rhat
+ && lo > u2[BITINT_END (un - j - vn + 2,
+ j + vn - 2)]))
+ {
+ --qhat;
+ if (!__builtin_add_overflow (rhat, vv1, &rhat))
+ goto again;
+ }
+ }
+
+ c = bitint_submul_1 (u2 + BITINT_END (un - j, j),
+ v2 + BITINT_END (vn - 1, 0), qhat, vn);
+ u2[BITINT_END (un - j - vn, j + vn)] -= c;
+ /* If we've subtracted too much, decrease qhat and
+ and add back. */
+ if ((Wtype) u2[BITINT_END (un - j - vn, j + vn)] < 0)
+ {
+ --qhat;
+ c = 0;
+ for (USItype i = 0; i < vn; ++i)
+ {
+ UWtype s = v2[BITINT_END (vn - 1 - i, i)];
+ UWtype d = u2[BITINT_END (un - i - j, i + j)];
+ UWtype c1 = __builtin_add_overflow (d, s, &d);
+ UWtype c2 = __builtin_add_overflow (d, c, &d);
+ c = c1 + c2;
+ u2[BITINT_END (un - i - j, i + j)] = d;
+ }
+ u2[BITINT_END (un - j - vn, j + vn)] += c;
+ }
+ q2[BITINT_END (un - vn - j, j)] = qhat;
+ }
+ if (r != NULL)
+ {
+ if (s)
+ {
+ const SItype n = sizeof (UWtype) * __CHAR_BIT__;
+ /* Unnormalize remainder. */
+ USItype i;
+ for (i = 0; i < vn && i < rn; ++i)
+ r[BITINT_END (rn - 1 - i, i)]
+ = ((u2[BITINT_END (un - i, i)] >> s)
+ | (u2[BITINT_END (un - i - 1, i + 1)] << (n - s)));
+ if (i < rn)
+ r[BITINT_END (rn - vn, vn - 1)]
+ = u2[BITINT_END (un - vn + 1, vn - 1)] >> s;
+ }
+ else if (rn > vn)
+ __builtin_memcpy (&r[BITINT_END (rn - vn, 0)],
+ &u2[BITINT_END (un + 1 - vn, 0)],
+ vn * sizeof (UWtype));
+ else
+ __builtin_memcpy (&r[0], &u2[BITINT_END (un + 1 - rn, 0)],
+ rn * sizeof (UWtype));
+ }
+ }
+ }
+ if (q != NULL)
+ {
+ if ((uprec < 0) ^ (vprec < 0))
+ {
+ /* Negative quotient. */
+ USItype n;
+ if (un - vn + 1 > qn)
+ n = qn;
+ else
+ n = un - vn + 1;
+ bitint_negate (q + BITINT_END (qn - 1, 0),
+ q2 + BITINT_END (un - vn, 0), n);
+ if (qn > n)
+ __builtin_memset (q + BITINT_END (0, n), -1,
+ (qn - n) * sizeof (UWtype));
+ }
+ else
+ {
+ /* Positive quotient. */
+ if (qn2)
+ __builtin_memcpy (q, q2 + BITINT_END (un - vn + 1 - qn, 0),
+ qn * sizeof (UWtype));
+ else if (qn > un - vn + 1)
+ __builtin_memset (q + BITINT_END (0, un - vn + 1), 0,
+ (qn - (un - vn + 1)) * sizeof (UWtype));
+ }
+ }
+ if (r != NULL)
+ {
+ if (uprec < 0)
+ {
+ /* Negative remainder. */
+ bitint_negate (r + BITINT_END (rn - 1, 0),
+ r + BITINT_END (rn - 1, 0),
+ rn > vn ? vn : rn);
+ if (rn > vn)
+ __builtin_memset (r + BITINT_END (0, vn), -1,
+ (rn - vn) * sizeof (UWtype));
+ }
+ else
+ {
+ /* Positive remainder. */
+ if (rn > vn)
+ __builtin_memset (r + BITINT_END (0, vn), 0,
+ (rn - vn) * sizeof (UWtype));
+ }
+ }
+}
+#endif
+#endif
+\f
#ifdef L_cmpdi2
cmp_return_type
__cmpdi2 (DWtype a, DWtype b)
{
- const DWunion au = {.ll = a};
- const DWunion bu = {.ll = b};
-
- if (au.s.high < bu.s.high)
- return 0;
- else if (au.s.high > bu.s.high)
- return 2;
- if ((UWtype) au.s.low < (UWtype) bu.s.low)
- return 0;
- else if ((UWtype) au.s.low > (UWtype) bu.s.low)
- return 2;
- return 1;
+ return (a > b) - (a < b) + 1;
}
#endif
#ifdef L_ucmpdi2
cmp_return_type
-__ucmpdi2 (DWtype a, DWtype b)
+__ucmpdi2 (UDWtype a, UDWtype b)
{
- const DWunion au = {.ll = a};
- const DWunion bu = {.ll = b};
-
- if ((UWtype) au.s.high < (UWtype) bu.s.high)
- return 0;
- else if ((UWtype) au.s.high > (UWtype) bu.s.high)
- return 2;
- if ((UWtype) au.s.low < (UWtype) bu.s.low)
- return 0;
- else if ((UWtype) au.s.low > (UWtype) bu.s.low)
- return 2;
- return 1;
+ return (a > b) - (a < b) + 1;
}
#endif
\f
hi = -(UWtype) hi;
UWtype count, shift;
+#if !defined (COUNT_LEADING_ZEROS_0) || COUNT_LEADING_ZEROS_0 != W_TYPE_SIZE
+ if (hi == 0)
+ count = W_TYPE_SIZE;
+ else
+#endif
count_leading_zeros (count, hi);
/* No leading bits means u == minimum. */
if (count == 0)
- return -(Wtype_MAXp1_F * (Wtype_MAXp1_F / 2));
+ return Wtype_MAXp1_F * (FSTYPE) (hi | ((UWtype) u != 0));
shift = 1 + W_TYPE_SIZE - count;
TYPE
NAME (TYPE x, int m)
{
- unsigned int n = m < 0 ? -m : m;
+ unsigned int n = m < 0 ? -(unsigned int) m : (unsigned int) m;
TYPE y = n % 2 ? x : 1;
while (n >>= 1)
{
#endif
\f
-#if ((defined(L_mulsc3) || defined(L_divsc3)) && LIBGCC2_HAS_SF_MODE) \
+#if((defined(L_mulhc3) || defined(L_divhc3)) && LIBGCC2_HAS_HF_MODE) \
+ || ((defined(L_mulsc3) || defined(L_divsc3)) && LIBGCC2_HAS_SF_MODE) \
|| ((defined(L_muldc3) || defined(L_divdc3)) && LIBGCC2_HAS_DF_MODE) \
|| ((defined(L_mulxc3) || defined(L_divxc3)) && LIBGCC2_HAS_XF_MODE) \
|| ((defined(L_multc3) || defined(L_divtc3)) && LIBGCC2_HAS_TF_MODE)
#undef double
#undef long
-#if defined(L_mulsc3) || defined(L_divsc3)
+#if defined(L_mulhc3) || defined(L_divhc3)
+# define MTYPE HFtype
+# define CTYPE HCtype
+# define AMTYPE SFtype
+# define MODE hc
+# define CEXT __LIBGCC_HF_FUNC_EXT__
+# define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__)
+#elif defined(L_mulsc3) || defined(L_divsc3)
# define MTYPE SFtype
# define CTYPE SCtype
+# define AMTYPE DFtype
# define MODE sc
# define CEXT __LIBGCC_SF_FUNC_EXT__
-# define NOTRUNC __LIBGCC_SF_EXCESS_PRECISION__
+# define NOTRUNC (!__LIBGCC_SF_EXCESS_PRECISION__)
+# define RBIG (__LIBGCC_SF_MAX__ / 2)
+# define RMIN (__LIBGCC_SF_MIN__)
+# define RMIN2 (__LIBGCC_SF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_SF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
#elif defined(L_muldc3) || defined(L_divdc3)
# define MTYPE DFtype
# define CTYPE DCtype
# define MODE dc
# define CEXT __LIBGCC_DF_FUNC_EXT__
-# define NOTRUNC __LIBGCC_DF_EXCESS_PRECISION__
+# define NOTRUNC (!__LIBGCC_DF_EXCESS_PRECISION__)
+# define RBIG (__LIBGCC_DF_MAX__ / 2)
+# define RMIN (__LIBGCC_DF_MIN__)
+# define RMIN2 (__LIBGCC_DF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
#elif defined(L_mulxc3) || defined(L_divxc3)
# define MTYPE XFtype
# define CTYPE XCtype
# define MODE xc
# define CEXT __LIBGCC_XF_FUNC_EXT__
-# define NOTRUNC __LIBGCC_XF_EXCESS_PRECISION__
+# define NOTRUNC (!__LIBGCC_XF_EXCESS_PRECISION__)
+# define RBIG (__LIBGCC_XF_MAX__ / 2)
+# define RMIN (__LIBGCC_XF_MIN__)
+# define RMIN2 (__LIBGCC_XF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_XF_EPSILON__)
+# define RMAX2 (RBIG * RMIN2)
#elif defined(L_multc3) || defined(L_divtc3)
# define MTYPE TFtype
# define CTYPE TCtype
# define MODE tc
# define CEXT __LIBGCC_TF_FUNC_EXT__
-# define NOTRUNC __LIBGCC_TF_EXCESS_PRECISION__
+# define NOTRUNC (!__LIBGCC_TF_EXCESS_PRECISION__)
+# if __LIBGCC_TF_MANT_DIG__ == 106
+# define RBIG (__LIBGCC_DF_MAX__ / 2)
+# define RMIN (__LIBGCC_DF_MIN__)
+# define RMIN2 (__LIBGCC_DF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_DF_EPSILON__)
+# else
+# define RBIG (__LIBGCC_TF_MAX__ / 2)
+# define RMIN (__LIBGCC_TF_MIN__)
+# define RMIN2 (__LIBGCC_TF_EPSILON__)
+# define RMINSCAL (1 / __LIBGCC_TF_EPSILON__)
+# endif
+# define RMAX2 (RBIG * RMIN2)
#else
# error
#endif
#define CONCAT2(A,B) _CONCAT2(A,B)
#define _CONCAT2(A,B) A##B
-/* All of these would be present in a full C99 implementation of <math.h>
- and <complex.h>. Our problem is that only a few systems have such full
- implementations. Further, libgcc_s.so isn't currently linked against
- libm.so, and even for systems that do provide full C99, the extra overhead
- of all programs using libgcc having to link against libm. So avoid it. */
-
-#define isnan(x) __builtin_expect ((x) != (x), 0)
-#define isfinite(x) __builtin_expect (!isnan((x) - (x)), 1)
-#define isinf(x) __builtin_expect (!isnan(x) & !isfinite(x), 0)
+#define isnan(x) __builtin_isnan (x)
+#define isfinite(x) __builtin_isfinite (x)
+#define isinf(x) __builtin_isinf (x)
#define INFINITY CONCAT2(__builtin_huge_val, CEXT) ()
#define I 1i
# define TRUNC(x) __asm__ ("" : "=m"(x) : "m"(x))
#endif
-#if defined(L_mulsc3) || defined(L_muldc3) \
+#if defined(L_mulhc3) || defined(L_mulsc3) || defined(L_muldc3) \
|| defined(L_mulxc3) || defined(L_multc3)
CTYPE
}
#endif /* complex multiply */
-#if defined(L_divsc3) || defined(L_divdc3) \
+#if defined(L_divhc3) || defined(L_divsc3) || defined(L_divdc3) \
|| defined(L_divxc3) || defined(L_divtc3)
CTYPE
CONCAT3(__div,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d)
{
+#if defined(L_divhc3) \
+ || (defined(L_divsc3) && defined(__LIBGCC_HAVE_HWDBL__) )
+
+ /* Half precision is handled with float precision.
+ float is handled with double precision when double precision
+ hardware is available.
+ Due to the additional precision, the simple complex divide
+ method (without Smith's method) is sufficient to get accurate
+ answers and runs slightly faster than Smith's method. */
+
+ AMTYPE aa, bb, cc, dd;
+ AMTYPE denom;
+ MTYPE x, y;
+ CTYPE res;
+ aa = a;
+ bb = b;
+ cc = c;
+ dd = d;
+
+ denom = (cc * cc) + (dd * dd);
+ x = ((aa * cc) + (bb * dd)) / denom;
+ y = ((bb * cc) - (aa * dd)) / denom;
+
+#else
MTYPE denom, ratio, x, y;
CTYPE res;
- /* ??? We can get better behavior from logarithmic scaling instead of
- the division. But that would mean starting to link libgcc against
- libm. We could implement something akin to ldexp/frexp as gcc builtins
- fairly easily... */
+ /* double, extended, long double have significant potential
+ underflow/overflow errors that can be greatly reduced with
+ a limited number of tests and adjustments. float is handled
+ the same way when no HW double is available.
+ */
+
+ /* Scale by max(c,d) to reduce chances of denominator overflowing. */
if (FABS (c) < FABS (d))
{
+ /* Prevent underflow when denominator is near max representable. */
+ if (FABS (d) >= RBIG)
+ {
+ a = a / 2;
+ b = b / 2;
+ c = c / 2;
+ d = d / 2;
+ }
+ /* Avoid overflow/underflow issues when c and d are small.
+ Scaling up helps avoid some underflows.
+ No new overflow possible since c&d < RMIN2. */
+ if (FABS (d) < RMIN2)
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ else
+ {
+ if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (d) < RMAX2))
+ || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+ && (FABS (d) < RMAX2)))
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ }
ratio = c / d;
denom = (c * ratio) + d;
- x = ((a * ratio) + b) / denom;
- y = ((b * ratio) - a) / denom;
+ /* Choose alternate order of computation if ratio is subnormal. */
+ if (FABS (ratio) > RMIN)
+ {
+ x = ((a * ratio) + b) / denom;
+ y = ((b * ratio) - a) / denom;
+ }
+ else
+ {
+ x = ((c * (a / d)) + b) / denom;
+ y = ((c * (b / d)) - a) / denom;
+ }
}
else
{
+ /* Prevent underflow when denominator is near max representable. */
+ if (FABS (c) >= RBIG)
+ {
+ a = a / 2;
+ b = b / 2;
+ c = c / 2;
+ d = d / 2;
+ }
+ /* Avoid overflow/underflow issues when both c and d are small.
+ Scaling up helps avoid some underflows.
+ No new overflow possible since both c&d are less than RMIN2. */
+ if (FABS (c) < RMIN2)
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ else
+ {
+ if (((FABS (a) < RMIN) && (FABS (b) < RMAX2) && (FABS (c) < RMAX2))
+ || ((FABS (b) < RMIN) && (FABS (a) < RMAX2)
+ && (FABS (c) < RMAX2)))
+ {
+ a = a * RMINSCAL;
+ b = b * RMINSCAL;
+ c = c * RMINSCAL;
+ d = d * RMINSCAL;
+ }
+ }
ratio = d / c;
denom = (d * ratio) + c;
- x = ((b * ratio) + a) / denom;
- y = (b - (a * ratio)) / denom;
+ /* Choose alternate order of computation if ratio is subnormal. */
+ if (FABS (ratio) > RMIN)
+ {
+ x = ((b * ratio) + a) / denom;
+ y = (b - (a * ratio)) / denom;
+ }
+ else
+ {
+ x = (a + (d * (b / c))) / denom;
+ y = (b - (d * (a / c))) / denom;
+ }
}
+#endif
- /* Recover infinities and zeros that computed as NaN+iNaN; the only cases
- are nonzero/zero, infinite/finite, and finite/infinite. */
+ /* Recover infinities and zeros that computed as NaN+iNaN; the only
+ cases are nonzero/zero, infinite/finite, and finite/infinite. */
if (isnan (x) && isnan (y))
{
if (c == 0.0 && d == 0.0 && (!isnan (a) || !isnan (b)))
/* Clear part of an instruction cache. */
void
-__clear_cache (char *beg __attribute__((__unused__)),
- char *end __attribute__((__unused__)))
+__clear_cache (void *beg __attribute__((__unused__)),
+ void *end __attribute__((__unused__)))
{
#ifdef CLEAR_INSN_CACHE
- CLEAR_INSN_CACHE (beg, end);
+ /* Cast the void* pointers to char* as some implementations
+ of the macro assume the pointers can be subtracted from
+ one another. */
+ CLEAR_INSN_CACHE ((char *) beg, (char *) end);
#endif /* CLEAR_INSN_CACHE */
}
/* Jump to a trampoline, loading the static chain address. */
#if defined(WINNT) && ! defined(__CYGWIN__)
+#define WIN32_LEAN_AND_MEAN
#include <windows.h>
int getpagesize (void);
int mprotect (char *,int, int);