]> git.ipfire.org Git - thirdparty/glibc.git/blame - stdlib/longlong.h
support: Add capability to fork an sgid child
[thirdparty/glibc.git] / stdlib / longlong.h
CommitLineData
28f540f4 1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
2b778ceb 2 Copyright (C) 1991-2021 Free Software Foundation, Inc.
41b0afab 3
41bdb6e2 4 This file is part of the GNU C Library.
28f540f4 5
41bdb6e2
AJ
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
1da2d51a 8 License as published by the Free Software Foundation; either
41bdb6e2 9 version 2.1 of the License, or (at your option) any later version.
28f540f4 10
def7fbd6
AS
11 In addition to the permissions in the GNU Lesser General Public
12 License, the Free Software Foundation gives you unlimited
13 permission to link the compiled version of this file into
14 combinations with other programs, and to distribute those
15 combinations without any restriction coming from the use of this
16 file. (The Lesser General Public License restrictions do apply in
17 other respects; for example, they cover modification of the file,
18 and distribution when not linked into a combine executable.)
19
41bdb6e2
AJ
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
28f540f4 24
41bdb6e2 25 You should have received a copy of the GNU Lesser General Public
59ba27a6 26 License along with the GNU C Library; if not, see
5a82c748 27 <https://www.gnu.org/licenses/>. */
28f540f4 28
e9b3e3c5
UD
29/* You have to define the following before including this file:
30
31 UWtype -- An unsigned type, default type for operations (typically a "word")
32 UHWtype -- An unsigned type, at least half the size of UWtype.
33 UDWtype -- An unsigned type, at least twice as large a UWtype
34 W_TYPE_SIZE -- size in bits of UWtype
35
36 UQItype -- Unsigned 8 bit type.
37 SItype, USItype -- Signed and unsigned 32 bit types.
38 DItype, UDItype -- Signed and unsigned 64 bit types.
39
40 On a 32 bit machine UWtype should typically be USItype;
f30070ae 41 on a 64 bit machine, UWtype should typically be UDItype. */
b928942e 42
e9b3e3c5
UD
43#define __BITS4 (W_TYPE_SIZE / 4)
44#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48#ifndef W_TYPE_SIZE
49#define W_TYPE_SIZE 32
50#define UWtype USItype
51#define UHWtype USItype
52#define UDWtype UDItype
53#endif
04fbd653 54
def7fbd6
AS
55/* Used in glibc only. */
56#ifndef attribute_hidden
57#define attribute_hidden
58#endif
59
6f8a7dff 60extern const UQItype __clz_tab[256] attribute_hidden;
f30070ae 61
28f540f4
RM
62/* Define auxiliary asm macros.
63
f30070ae
RM
64 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
e9b3e3c5 66 word product in HIGH_PROD and LOW_PROD.
28f540f4 67
e9b3e3c5
UD
68 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69 UDWtype product. This is just a variant of umul_ppmm.
28f540f4
RM
70
71 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
e9b3e3c5
UD
72 denominator) divides a UDWtype, composed by the UWtype integers
73 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
75 than DENOMINATOR for correct operation. If, in addition, the most
76 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77 UDIV_NEEDS_NORMALIZATION is defined to 1.
28f540f4
RM
78
79 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
e9b3e3c5
UD
80 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
81 is rounded towards 0.
1da2d51a 82
e9b3e3c5 83 5) count_leading_zeros(count, x) counts the number of zero-bits from the
41b0afab 84 msb to the first nonzero bit in the UWtype X. This is the number of
e9b3e3c5
UD
85 steps X needs to be shifted left to set the msb. Undefined for X == 0,
86 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
1da2d51a 87
62818cfd
UD
88 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89 from the least significant end.
90
91 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
e9b3e3c5
UD
92 high_addend_2, low_addend_2) adds two UWtype integers, composed by
93 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
95 (i.e. carry out) is not stored anywhere, and is lost.
96
97 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
101 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
28f540f4
RM
102 and is lost.
103
104 If any of these macros are left undefined for a particular CPU,
105 C macros are used. */
106
107/* The CPUs come in alphabetical order below.
108
109 Please add support for more CPUs here, or improve the current support
1da2d51a
UD
110 for the CPUs below!
111 (E.g. WE32100, IBM360.) */
28f540f4
RM
112
113#if defined (__GNUC__) && !defined (NO_ASM)
114
115/* We sometimes need to clobber "cc" with gcc2, but that would not be
116 understood by gcc1. Use cpp to avoid major code duplication. */
117#if __GNUC__ < 2
118#define __CLOBBER_CC
119#define __AND_CLOBBER_CC
120#else /* __GNUC__ >= 2 */
121#define __CLOBBER_CC : "cc"
122#define __AND_CLOBBER_CC , "cc"
123#endif /* __GNUC__ < 2 */
124
ea6c92f3
YZ
125#if defined (__aarch64__)
126
127#if W_TYPE_SIZE == 32
128#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
129#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
130#define COUNT_LEADING_ZEROS_0 32
131#endif /* W_TYPE_SIZE == 32 */
132
133#if W_TYPE_SIZE == 64
134#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X))
135#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X))
136#define COUNT_LEADING_ZEROS_0 64
137#endif /* W_TYPE_SIZE == 64 */
138
139#endif /* __aarch64__ */
140
e9b3e3c5 141#if defined (__alpha) && W_TYPE_SIZE == 64
7f49b7c0
RH
142/* There is a bug in g++ before version 5 that
143 errors on __builtin_alpha_umulh. */
144#if !defined(__cplusplus) || __GNUC__ >= 5
e9b3e3c5
UD
145#define umul_ppmm(ph, pl, m0, m1) \
146 do { \
147 UDItype __m0 = (m0), __m1 = (m1); \
f30070ae 148 (ph) = __builtin_alpha_umulh (__m0, __m1); \
e9b3e3c5
UD
149 (pl) = __m0 * __m1; \
150 } while (0)
151#define UMUL_TIME 46
7f49b7c0 152#endif /* !c++ */
e9b3e3c5
UD
153#ifndef LONGLONG_STANDALONE
154#define udiv_qrnnd(q, r, n1, n0, d) \
155 do { UDItype __r; \
156 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
157 (r) = __r; \
158 } while (0)
f2672ddd 159extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
e9b3e3c5
UD
160#define UDIV_TIME 220
161#endif /* LONGLONG_STANDALONE */
41b0afab 162#ifdef __alpha_cix__
f30070ae
RM
163#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
164#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
41b0afab
RM
165#define COUNT_LEADING_ZEROS_0 64
166#else
41b0afab
RM
167#define count_leading_zeros(COUNT,X) \
168 do { \
169 UDItype __xr = (X), __t, __a; \
f30070ae 170 __t = __builtin_alpha_cmpbge (0, __xr); \
41b0afab 171 __a = __clz_tab[__t ^ 0xff] - 1; \
f30070ae 172 __t = __builtin_alpha_extbl (__xr, __a); \
41b0afab
RM
173 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
174 } while (0)
175#define count_trailing_zeros(COUNT,X) \
176 do { \
177 UDItype __xr = (X), __t, __a; \
f30070ae 178 __t = __builtin_alpha_cmpbge (0, __xr); \
41b0afab
RM
179 __t = ~__t & -~__t; \
180 __a = ((__t & 0xCC) != 0) * 2; \
181 __a += ((__t & 0xF0) != 0) * 4; \
182 __a += ((__t & 0xAA) != 0); \
f30070ae 183 __t = __builtin_alpha_extbl (__xr, __a); \
41b0afab
RM
184 __a <<= 3; \
185 __t &= -__t; \
186 __a += ((__t & 0xCC) != 0) * 2; \
187 __a += ((__t & 0xF0) != 0) * 4; \
188 __a += ((__t & 0xAA) != 0); \
189 (COUNT) = __a; \
190 } while (0)
191#endif /* __alpha_cix__ */
e9b3e3c5
UD
192#endif /* __alpha */
193
194#if defined (__arc__) && W_TYPE_SIZE == 32
1da2d51a 195#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 196 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
1da2d51a
UD
197 : "=r" ((USItype) (sh)), \
198 "=&r" ((USItype) (sl)) \
199 : "%r" ((USItype) (ah)), \
5d025ea6 200 "rICal" ((USItype) (bh)), \
1da2d51a 201 "%r" ((USItype) (al)), \
a9f4703d
VG
202 "rICal" ((USItype) (bl)) \
203 : "cc")
1da2d51a 204#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 205 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
1da2d51a
UD
206 : "=r" ((USItype) (sh)), \
207 "=&r" ((USItype) (sl)) \
208 : "r" ((USItype) (ah)), \
5d025ea6 209 "rICal" ((USItype) (bh)), \
1da2d51a 210 "r" ((USItype) (al)), \
a9f4703d
VG
211 "rICal" ((USItype) (bl)) \
212 : "cc")
ab07cea8
JM
213
214#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
215#ifdef __ARC_NORM__
216#define count_leading_zeros(count, x) \
217 do \
218 { \
219 SItype c_; \
220 \
221 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
222 (count) = c_ + 1; \
223 } \
224 while (0)
225#define COUNT_LEADING_ZEROS_0 32
5d025ea6
JM
226#endif /* __ARC_NORM__ */
227#endif /* __arc__ */
28f540f4 228
8115f29b
L
229#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
230 && W_TYPE_SIZE == 32
28f540f4 231#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 232 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
1da2d51a
UD
233 : "=r" ((USItype) (sh)), \
234 "=&r" ((USItype) (sl)) \
235 : "%r" ((USItype) (ah)), \
236 "rI" ((USItype) (bh)), \
237 "%r" ((USItype) (al)), \
f30070ae 238 "rI" ((USItype) (bl)) __CLOBBER_CC)
28f540f4 239#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 240 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
1da2d51a
UD
241 : "=r" ((USItype) (sh)), \
242 "=&r" ((USItype) (sl)) \
243 : "r" ((USItype) (ah)), \
244 "rI" ((USItype) (bh)), \
245 "r" ((USItype) (al)), \
f30070ae 246 "rI" ((USItype) (bl)) __CLOBBER_CC)
8115f29b
L
247# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
248 || defined(__ARM_ARCH_3__)
249# define umul_ppmm(xh, xl, a, b) \
250 do { \
251 register USItype __t0, __t1, __t2; \
252 __asm__ ("%@ Inlined umul_ppmm\n" \
41b0afab
RM
253 " mov %2, %5, lsr #16\n" \
254 " mov %0, %6, lsr #16\n" \
255 " bic %3, %5, %2, lsl #16\n" \
256 " bic %4, %6, %0, lsl #16\n" \
257 " mul %1, %3, %4\n" \
258 " mul %4, %2, %4\n" \
259 " mul %3, %0, %3\n" \
260 " mul %0, %2, %0\n" \
261 " adds %3, %4, %3\n" \
262 " addcs %0, %0, #65536\n" \
263 " adds %1, %1, %3, lsl #16\n" \
264 " adc %0, %0, %3, lsr #16" \
1da2d51a
UD
265 : "=&r" ((USItype) (xh)), \
266 "=r" ((USItype) (xl)), \
267 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
268 : "r" ((USItype) (a)), \
8115f29b
L
269 "r" ((USItype) (b)) __CLOBBER_CC ); \
270 } while (0)
271# define UMUL_TIME 20
272# else
273# define umul_ppmm(xh, xl, a, b) \
274 do { \
275 /* Generate umull, under compiler control. */ \
276 register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
277 (xl) = (USItype)__t0; \
278 (xh) = (USItype)(__t0 >> 32); \
279 } while (0)
280# define UMUL_TIME 3
281# endif
282# define UDIV_TIME 100
28f540f4
RM
283#endif /* __arm__ */
284
24784465
RM
285#if defined(__arm__)
286/* Let gcc decide how best to implement count_leading_zeros. */
287#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
8115f29b 288#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
24784465
RM
289#define COUNT_LEADING_ZEROS_0 32
290#endif
291
8115f29b
L
292#if defined (__AVR__)
293
294#if W_TYPE_SIZE == 16
295#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
296#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
297#define COUNT_LEADING_ZEROS_0 16
298#endif /* W_TYPE_SIZE == 16 */
299
300#if W_TYPE_SIZE == 32
301#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
302#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
303#define COUNT_LEADING_ZEROS_0 32
304#endif /* W_TYPE_SIZE == 32 */
305
306#if W_TYPE_SIZE == 64
307#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X))
308#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
309#define COUNT_LEADING_ZEROS_0 64
310#endif /* W_TYPE_SIZE == 64 */
311
312#endif /* defined (__AVR__) */
313
ab07cea8
JM
314#if defined (__CRIS__)
315
316#if __CRIS_arch_version >= 3
24784465 317#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
ab07cea8
JM
318#define COUNT_LEADING_ZEROS_0 32
319#endif /* __CRIS_arch_version >= 3 */
320
24784465
RM
321#if __CRIS_arch_version >= 8
322#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
ab07cea8
JM
323#endif /* __CRIS_arch_version >= 8 */
324
325#if __CRIS_arch_version >= 10
326#define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
327#else
328#define __umulsidi3 __umulsidi3
329extern UDItype __umulsidi3 (USItype, USItype);
330#endif /* __CRIS_arch_version >= 10 */
331
332#define umul_ppmm(w1, w0, u, v) \
333 do { \
334 UDItype __x = __umulsidi3 (u, v); \
335 (w0) = (USItype) (__x); \
336 (w1) = (USItype) (__x >> 32); \
337 } while (0)
338
339/* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
340 DFmode ("double" intrinsics, avoiding two of the three insns handling
341 carry), but defining them as open-code C composing and doing the
342 operation in DImode (UDImode) shows that the DImode needs work:
343 register pressure from requiring neighboring registers and the
344 traffic to and from them come to dominate, in the 4.7 series. */
345
346#endif /* defined (__CRIS__) */
24784465 347
e9b3e3c5 348#if defined (__hppa) && W_TYPE_SIZE == 32
28f540f4 349#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 350 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
1da2d51a
UD
351 : "=r" ((USItype) (sh)), \
352 "=&r" ((USItype) (sl)) \
353 : "%rM" ((USItype) (ah)), \
354 "rM" ((USItype) (bh)), \
355 "%rM" ((USItype) (al)), \
356 "rM" ((USItype) (bl)))
28f540f4 357#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 358 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
1da2d51a
UD
359 : "=r" ((USItype) (sh)), \
360 "=&r" ((USItype) (sl)) \
361 : "rM" ((USItype) (ah)), \
362 "rM" ((USItype) (bh)), \
363 "rM" ((USItype) (al)), \
364 "rM" ((USItype) (bl)))
28f540f4 365#if defined (_PA_RISC1_1)
1da2d51a 366#define umul_ppmm(w1, w0, u, v) \
28f540f4 367 do { \
1da2d51a
UD
368 union \
369 { \
370 UDItype __f; \
371 struct {USItype __w1, __w0;} __w1w0; \
372 } __t; \
28f540f4 373 __asm__ ("xmpyu %1,%2,%0" \
1da2d51a
UD
374 : "=x" (__t.__f) \
375 : "x" ((USItype) (u)), \
376 "x" ((USItype) (v))); \
377 (w1) = __t.__w1w0.__w1; \
378 (w0) = __t.__w1w0.__w0; \
379 } while (0)
28f540f4 380#define UMUL_TIME 8
28f540f4 381#else
1da2d51a 382#define UMUL_TIME 30
28f540f4 383#endif
1da2d51a 384#define UDIV_TIME 40
28f540f4 385#define count_leading_zeros(count, x) \
41b0afab
RM
386 do { \
387 USItype __tmp; \
388 __asm__ ( \
389 "ldi 1,%0\n" \
390" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
391" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
392" ldo 16(%0),%0 ; Yes. Perform add.\n" \
393" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
394" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
395" ldo 8(%0),%0 ; Yes. Perform add.\n" \
396" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
397" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
398" ldo 4(%0),%0 ; Yes. Perform add.\n" \
399" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
400" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
401" ldo 2(%0),%0 ; Yes. Perform add.\n" \
402" extru %1,30,1,%1 ; Extract bit 1.\n" \
403" sub %0,%1,%0 ; Subtract it.\n" \
404 : "=r" (count), "=r" (__tmp) : "1" (x)); \
28f540f4 405 } while (0)
28f540f4
RM
406#endif
407
f30070ae 408#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
def7fbd6 409#if !defined (__zarch__)
e9b3e3c5
UD
410#define smul_ppmm(xh, xl, m0, m1) \
411 do { \
412 union {DItype __ll; \
413 struct {USItype __h, __l;} __i; \
f30070ae
RM
414 } __x; \
415 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
416 : "=&r" (__x.__ll) \
417 : "r" (m0), "r" (m1)); \
418 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
e9b3e3c5
UD
419 } while (0)
420#define sdiv_qrnnd(q, r, n1, n0, d) \
421 do { \
422 union {DItype __ll; \
423 struct {USItype __h, __l;} __i; \
f30070ae
RM
424 } __x; \
425 __x.__i.__h = n1; __x.__i.__l = n0; \
e9b3e3c5 426 __asm__ ("dr %0,%2" \
f30070ae
RM
427 : "=r" (__x.__ll) \
428 : "0" (__x.__ll), "r" (d)); \
429 (q) = __x.__i.__l; (r) = __x.__i.__h; \
e9b3e3c5 430 } while (0)
def7fbd6
AS
431#else
432#define smul_ppmm(xh, xl, m0, m1) \
433 do { \
48693bea
AK
434 register SItype __r0 __asm__ ("0"); \
435 register SItype __r1 __asm__ ("1") = (m0); \
436 \
def7fbd6 437 __asm__ ("mr\t%%r0,%3" \
48693bea
AK
438 : "=r" (__r0), "=r" (__r1) \
439 : "r" (__r1), "r" (m1)); \
440 (xh) = __r0; (xl) = __r1; \
def7fbd6 441 } while (0)
48693bea 442
def7fbd6
AS
443#define sdiv_qrnnd(q, r, n1, n0, d) \
444 do { \
48693bea
AK
445 register SItype __r0 __asm__ ("0") = (n1); \
446 register SItype __r1 __asm__ ("1") = (n0); \
447 \
448 __asm__ ("dr\t%%r0,%4" \
449 : "=r" (__r0), "=r" (__r1) \
450 : "r" (__r0), "r" (__r1), "r" (d)); \
451 (q) = __r1; (r) = __r0; \
def7fbd6
AS
452 } while (0)
453#endif /* __zarch__ */
e9b3e3c5
UD
454#endif
455
456#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
28f540f4 457#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
24784465 458 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
1da2d51a
UD
459 : "=r" ((USItype) (sh)), \
460 "=&r" ((USItype) (sl)) \
461 : "%0" ((USItype) (ah)), \
462 "g" ((USItype) (bh)), \
463 "%1" ((USItype) (al)), \
464 "g" ((USItype) (bl)))
28f540f4 465#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
24784465 466 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
1da2d51a
UD
467 : "=r" ((USItype) (sh)), \
468 "=&r" ((USItype) (sl)) \
469 : "0" ((USItype) (ah)), \
470 "g" ((USItype) (bh)), \
471 "1" ((USItype) (al)), \
472 "g" ((USItype) (bl)))
28f540f4 473#define umul_ppmm(w1, w0, u, v) \
24784465 474 __asm__ ("mul{l} %3" \
1da2d51a
UD
475 : "=a" ((USItype) (w0)), \
476 "=d" ((USItype) (w1)) \
477 : "%0" ((USItype) (u)), \
478 "rm" ((USItype) (v)))
41b0afab 479#define udiv_qrnnd(q, r, n1, n0, dv) \
24784465 480 __asm__ ("div{l} %4" \
1da2d51a
UD
481 : "=a" ((USItype) (q)), \
482 "=d" ((USItype) (r)) \
483 : "0" ((USItype) (n0)), \
484 "1" ((USItype) (n1)), \
41b0afab 485 "rm" ((USItype) (dv)))
24784465
RM
486#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
487#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
28f540f4
RM
488#define UMUL_TIME 40
489#define UDIV_TIME 40
490#endif /* 80x86 */
491
6426d77e 492#if defined (__x86_64__) && W_TYPE_SIZE == 64
24784465
RM
493#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
494 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
495 : "=r" ((UDItype) (sh)), \
496 "=&r" ((UDItype) (sl)) \
497 : "%0" ((UDItype) (ah)), \
498 "rme" ((UDItype) (bh)), \
499 "%1" ((UDItype) (al)), \
500 "rme" ((UDItype) (bl)))
501#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
502 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
503 : "=r" ((UDItype) (sh)), \
504 "=&r" ((UDItype) (sl)) \
505 : "0" ((UDItype) (ah)), \
506 "rme" ((UDItype) (bh)), \
507 "1" ((UDItype) (al)), \
508 "rme" ((UDItype) (bl)))
509#define umul_ppmm(w1, w0, u, v) \
510 __asm__ ("mul{q} %3" \
511 : "=a" ((UDItype) (w0)), \
512 "=d" ((UDItype) (w1)) \
513 : "%0" ((UDItype) (u)), \
514 "rm" ((UDItype) (v)))
515#define udiv_qrnnd(q, r, n1, n0, dv) \
516 __asm__ ("div{q} %4" \
517 : "=a" ((UDItype) (q)), \
518 "=d" ((UDItype) (r)) \
519 : "0" ((UDItype) (n0)), \
520 "1" ((UDItype) (n1)), \
521 "rm" ((UDItype) (dv)))
8115f29b
L
522#define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
523#define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
24784465
RM
524#define UMUL_TIME 40
525#define UDIV_TIME 40
526#endif /* x86_64 */
527
e9b3e3c5 528#if defined (__i960__) && W_TYPE_SIZE == 32
28f540f4
RM
529#define umul_ppmm(w1, w0, u, v) \
530 ({union {UDItype __ll; \
531 struct {USItype __l, __h;} __i; \
532 } __xx; \
533 __asm__ ("emul %2,%1,%0" \
534 : "=d" (__xx.__ll) \
1da2d51a
UD
535 : "%dI" ((USItype) (u)), \
536 "dI" ((USItype) (v))); \
28f540f4
RM
537 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
538#define __umulsidi3(u, v) \
539 ({UDItype __w; \
540 __asm__ ("emul %2,%1,%0" \
541 : "=d" (__w) \
1da2d51a
UD
542 : "%dI" ((USItype) (u)), \
543 "dI" ((USItype) (v))); \
62818cfd 544 __w; })
1da2d51a 545#endif /* __i960__ */
28f540f4 546
def7fbd6
AS
547#if defined (__ia64) && W_TYPE_SIZE == 64
548/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
549 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
550 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
551 register, which takes an extra cycle. */
552#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
553 do { \
554 UWtype __x; \
555 __x = (al) - (bl); \
556 if ((al) < (bl)) \
557 (sh) = (ah) - (bh) - 1; \
558 else \
559 (sh) = (ah) - (bh); \
560 (sl) = __x; \
561 } while (0)
562
563/* Do both product parts in assembly, since that gives better code with
564 all gcc versions. Some callers will just use the upper part, and in
565 that situation we waste an instruction, but not any cycles. */
566#define umul_ppmm(ph, pl, m0, m1) \
567 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
568 : "=&f" (ph), "=f" (pl) \
569 : "f" (m0), "f" (m1))
570#define count_leading_zeros(count, x) \
571 do { \
572 UWtype _x = (x), _y, _a, _c; \
573 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
574 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
575 _c = (_a - 1) << 3; \
576 _x >>= _c; \
577 if (_x >= 1 << 4) \
578 _x >>= 4, _c += 4; \
579 if (_x >= 1 << 2) \
580 _x >>= 2, _c += 2; \
581 _c += _x >> 1; \
582 (count) = W_TYPE_SIZE - 1 - _c; \
583 } while (0)
584/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
585 based, and we don't need a special case for x==0 here */
586#define count_trailing_zeros(count, x) \
587 do { \
588 UWtype __ctz_x = (x); \
589 __asm__ ("popcnt %0 = %1" \
590 : "=r" (count) \
591 : "r" ((__ctz_x-1) & ~__ctz_x)); \
592 } while (0)
593#define UMUL_TIME 14
594#endif
595
e9b3e3c5 596#if defined (__M32R__) && W_TYPE_SIZE == 32
1da2d51a
UD
597#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
598 /* The cmp clears the condition bit. */ \
181742f8 599 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
1da2d51a
UD
600 : "=r" ((USItype) (sh)), \
601 "=&r" ((USItype) (sl)) \
181742f8 602 : "0" ((USItype) (ah)), \
1da2d51a 603 "r" ((USItype) (bh)), \
181742f8 604 "1" ((USItype) (al)), \
1da2d51a
UD
605 "r" ((USItype) (bl)) \
606 : "cbit")
607#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
608 /* The cmp clears the condition bit. */ \
181742f8 609 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
1da2d51a
UD
610 : "=r" ((USItype) (sh)), \
611 "=&r" ((USItype) (sl)) \
612 : "0" ((USItype) (ah)), \
613 "r" ((USItype) (bh)), \
614 "1" ((USItype) (al)), \
615 "r" ((USItype) (bl)) \
616 : "cbit")
617#endif /* __M32R__ */
618
e9b3e3c5 619#if defined (__mc68000__) && W_TYPE_SIZE == 32
28f540f4 620#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 621 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
1da2d51a
UD
622 : "=d" ((USItype) (sh)), \
623 "=&d" ((USItype) (sl)) \
624 : "%0" ((USItype) (ah)), \
625 "d" ((USItype) (bh)), \
626 "%1" ((USItype) (al)), \
627 "g" ((USItype) (bl)))
28f540f4 628#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 629 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
1da2d51a
UD
630 : "=d" ((USItype) (sh)), \
631 "=&d" ((USItype) (sl)) \
632 : "0" ((USItype) (ah)), \
633 "d" ((USItype) (bh)), \
634 "1" ((USItype) (al)), \
635 "g" ((USItype) (bl)))
636
f30070ae
RM
637/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
638#if (defined (__mc68020__) && !defined (__mc68060__))
28f540f4
RM
639#define umul_ppmm(w1, w0, u, v) \
640 __asm__ ("mulu%.l %3,%1:%0" \
1da2d51a
UD
641 : "=d" ((USItype) (w0)), \
642 "=d" ((USItype) (w1)) \
643 : "%0" ((USItype) (u)), \
644 "dmi" ((USItype) (v)))
28f540f4
RM
645#define UMUL_TIME 45
646#define udiv_qrnnd(q, r, n1, n0, d) \
647 __asm__ ("divu%.l %4,%1:%0" \
1da2d51a
UD
648 : "=d" ((USItype) (q)), \
649 "=d" ((USItype) (r)) \
650 : "0" ((USItype) (n0)), \
651 "1" ((USItype) (n1)), \
652 "dmi" ((USItype) (d)))
28f540f4
RM
653#define UDIV_TIME 90
654#define sdiv_qrnnd(q, r, n1, n0, d) \
655 __asm__ ("divs%.l %4,%1:%0" \
1da2d51a
UD
656 : "=d" ((USItype) (q)), \
657 "=d" ((USItype) (r)) \
658 : "0" ((USItype) (n0)), \
659 "1" ((USItype) (n1)), \
660 "dmi" ((USItype) (d)))
661
f30070ae
RM
662#elif defined (__mcoldfire__) /* not mc68020 */
663
664#define umul_ppmm(xh, xl, a, b) \
665 __asm__ ("| Inlined umul_ppmm\n" \
666 " move%.l %2,%/d0\n" \
667 " move%.l %3,%/d1\n" \
668 " move%.l %/d0,%/d2\n" \
669 " swap %/d0\n" \
670 " move%.l %/d1,%/d3\n" \
671 " swap %/d1\n" \
672 " move%.w %/d2,%/d4\n" \
673 " mulu %/d3,%/d4\n" \
674 " mulu %/d1,%/d2\n" \
675 " mulu %/d0,%/d3\n" \
676 " mulu %/d0,%/d1\n" \
677 " move%.l %/d4,%/d0\n" \
678 " clr%.w %/d0\n" \
679 " swap %/d0\n" \
680 " add%.l %/d0,%/d2\n" \
681 " add%.l %/d3,%/d2\n" \
682 " jcc 1f\n" \
683 " add%.l %#65536,%/d1\n" \
684 "1: swap %/d2\n" \
685 " moveq %#0,%/d0\n" \
686 " move%.w %/d2,%/d0\n" \
687 " move%.w %/d4,%/d2\n" \
688 " move%.l %/d2,%1\n" \
689 " add%.l %/d1,%/d0\n" \
690 " move%.l %/d0,%0" \
691 : "=g" ((USItype) (xh)), \
692 "=g" ((USItype) (xl)) \
693 : "g" ((USItype) (a)), \
694 "g" ((USItype) (b)) \
695 : "d0", "d1", "d2", "d3", "d4")
696#define UMUL_TIME 100
697#define UDIV_TIME 400
698#else /* not ColdFire */
1da2d51a 699/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
ba848785 700#define umul_ppmm(xh, xl, a, b) \
772d0e1a 701 __asm__ ("| Inlined umul_ppmm\n" \
41b0afab
RM
702 " move%.l %2,%/d0\n" \
703 " move%.l %3,%/d1\n" \
704 " move%.l %/d0,%/d2\n" \
705 " swap %/d0\n" \
706 " move%.l %/d1,%/d3\n" \
707 " swap %/d1\n" \
708 " move%.w %/d2,%/d4\n" \
709 " mulu %/d3,%/d4\n" \
710 " mulu %/d1,%/d2\n" \
711 " mulu %/d0,%/d3\n" \
712 " mulu %/d0,%/d1\n" \
713 " move%.l %/d4,%/d0\n" \
714 " eor%.w %/d0,%/d0\n" \
715 " swap %/d0\n" \
716 " add%.l %/d0,%/d2\n" \
717 " add%.l %/d3,%/d2\n" \
718 " jcc 1f\n" \
719 " add%.l %#65536,%/d1\n" \
720 "1: swap %/d2\n" \
721 " moveq %#0,%/d0\n" \
722 " move%.w %/d2,%/d0\n" \
723 " move%.w %/d4,%/d2\n" \
724 " move%.l %/d2,%1\n" \
725 " add%.l %/d1,%/d0\n" \
726 " move%.l %/d0,%0" \
1da2d51a
UD
727 : "=g" ((USItype) (xh)), \
728 "=g" ((USItype) (xl)) \
729 : "g" ((USItype) (a)), \
730 "g" ((USItype) (b)) \
731 : "d0", "d1", "d2", "d3", "d4")
28f540f4
RM
732#define UMUL_TIME 100
733#define UDIV_TIME 400
f30070ae 734
28f540f4 735#endif /* not mc68020 */
1da2d51a 736
f30070ae
RM
737/* The '020, '030, '040 and '060 have bitfield insns.
738 cpu32 disguises as a 68020, but lacks them. */
739#if defined (__mc68020__) && !defined (__mcpu32__)
1da2d51a
UD
740#define count_leading_zeros(count, x) \
741 __asm__ ("bfffo %1{%b2:%b2},%0" \
742 : "=d" ((USItype) (count)) \
743 : "od" ((USItype) (x)), "n" (0))
24784465
RM
744/* Some ColdFire architectures have a ff1 instruction supported via
745 __builtin_clz. */
746#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
747#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
748#define COUNT_LEADING_ZEROS_0 32
1da2d51a 749#endif
28f540f4
RM
750#endif /* mc68000 */
751
e9b3e3c5 752#if defined (__m88000__) && W_TYPE_SIZE == 32
28f540f4 753#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 754 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
1da2d51a
UD
755 : "=r" ((USItype) (sh)), \
756 "=&r" ((USItype) (sl)) \
757 : "%rJ" ((USItype) (ah)), \
758 "rJ" ((USItype) (bh)), \
759 "%rJ" ((USItype) (al)), \
760 "rJ" ((USItype) (bl)))
28f540f4 761#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 762 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
1da2d51a
UD
763 : "=r" ((USItype) (sh)), \
764 "=&r" ((USItype) (sl)) \
765 : "rJ" ((USItype) (ah)), \
766 "rJ" ((USItype) (bh)), \
767 "rJ" ((USItype) (al)), \
768 "rJ" ((USItype) (bl)))
28f540f4
RM
769#define count_leading_zeros(count, x) \
770 do { \
771 USItype __cbtmp; \
772 __asm__ ("ff1 %0,%1" \
773 : "=r" (__cbtmp) \
1da2d51a 774 : "r" ((USItype) (x))); \
28f540f4
RM
775 (count) = __cbtmp ^ 31; \
776 } while (0)
e9b3e3c5 777#define COUNT_LEADING_ZEROS_0 63 /* sic */
1da2d51a 778#if defined (__mc88110__)
28f540f4
RM
779#define umul_ppmm(wh, wl, u, v) \
780 do { \
781 union {UDItype __ll; \
782 struct {USItype __h, __l;} __i; \
783 } __xx; \
784 __asm__ ("mulu.d %0,%1,%2" \
785 : "=r" (__xx.__ll) \
1da2d51a
UD
786 : "r" ((USItype) (u)), \
787 "r" ((USItype) (v))); \
28f540f4
RM
788 (wh) = __xx.__i.__h; \
789 (wl) = __xx.__i.__l; \
790 } while (0)
791#define udiv_qrnnd(q, r, n1, n0, d) \
792 ({union {UDItype __ll; \
793 struct {USItype __h, __l;} __i; \
794 } __xx; \
795 USItype __q; \
796 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
797 __asm__ ("divu.d %0,%1,%2" \
798 : "=r" (__q) \
799 : "r" (__xx.__ll), \
1da2d51a 800 "r" ((USItype) (d))); \
28f540f4
RM
801 (r) = (n0) - __q * (d); (q) = __q; })
802#define UMUL_TIME 5
803#define UDIV_TIME 25
804#else
805#define UMUL_TIME 17
806#define UDIV_TIME 150
1da2d51a 807#endif /* __mc88110__ */
28f540f4
RM
808#endif /* __m88000__ */
809
def7fbd6
AS
810#if defined (__mn10300__)
811# if defined (__AM33__)
812# define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
813# define umul_ppmm(w1, w0, u, v) \
814 asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
815# define smul_ppmm(w1, w0, u, v) \
816 asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
817# else
818# define umul_ppmm(w1, w0, u, v) \
819 asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
820# define smul_ppmm(w1, w0, u, v) \
821 asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
822# endif
823# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
824 do { \
825 DWunion __s, __a, __b; \
826 __a.s.low = (al); __a.s.high = (ah); \
827 __b.s.low = (bl); __b.s.high = (bh); \
828 __s.ll = __a.ll + __b.ll; \
829 (sl) = __s.s.low; (sh) = __s.s.high; \
830 } while (0)
831# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
832 do { \
833 DWunion __s, __a, __b; \
834 __a.s.low = (al); __a.s.high = (ah); \
835 __b.s.low = (bl); __b.s.high = (bh); \
836 __s.ll = __a.ll - __b.ll; \
837 (sl) = __s.s.low; (sh) = __s.s.high; \
838 } while (0)
839# define udiv_qrnnd(q, r, nh, nl, d) \
840 asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
841# define sdiv_qrnnd(q, r, nh, nl, d) \
842 asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
843# define UMUL_TIME 3
844# define UDIV_TIME 38
845#endif
846
e9b3e3c5 847#if defined (__mips__) && W_TYPE_SIZE == 32
24784465
RM
848#define umul_ppmm(w1, w0, u, v) \
849 do { \
850 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
851 (w1) = (USItype) (__x >> 32); \
852 (w0) = (USItype) (__x); \
853 } while (0)
28f540f4
RM
854#define UMUL_TIME 10
855#define UDIV_TIME 100
24784465 856
6426d77e 857#if (__mips == 32 || __mips == 64) && ! defined (__mips16)
24784465
RM
858#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
859#define COUNT_LEADING_ZEROS_0 32
860#endif
28f540f4
RM
861#endif /* __mips__ */
862
41b0afab
RM
863/* FIXME: We should test _IBMR2 here when we add assembly support for the
864 system vendor compilers.
865 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
866 enough, since that hits ARM and m68k too. */
867#if (defined (_ARCH_PPC) /* AIX */ \
41b0afab
RM
868 || defined (__powerpc__) /* gcc */ \
869 || defined (__POWERPC__) /* BEOS */ \
870 || defined (__ppc__) /* Darwin */ \
24784465
RM
871 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
872 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
48693bea 873 && CPU_FAMILY == PPC) \
41b0afab 874 ) && W_TYPE_SIZE == 32
28f540f4
RM
875#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
876 do { \
877 if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 878 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
41b0afab
RM
879 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
880 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
c3c8283c 881 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
41b0afab 882 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
28f540f4 883 else \
c3c8283c 884 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
41b0afab
RM
885 : "=r" (sh), "=&r" (sl) \
886 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
28f540f4
RM
887 } while (0)
888#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
889 do { \
890 if (__builtin_constant_p (ah) && (ah) == 0) \
c3c8283c 891 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
41b0afab
RM
892 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
893 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
c3c8283c 894 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
41b0afab 895 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
28f540f4 896 else if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 897 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
41b0afab
RM
898 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
899 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
c3c8283c 900 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
41b0afab 901 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
28f540f4 902 else \
c3c8283c 903 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
41b0afab
RM
904 : "=r" (sh), "=&r" (sl) \
905 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
28f540f4
RM
906 } while (0)
907#define count_leading_zeros(count, x) \
c3c8283c 908 __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
e9b3e3c5 909#define COUNT_LEADING_ZEROS_0 32
41b0afab 910#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
24784465
RM
911 || defined (__ppc__) \
912 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
913 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
48693bea 914 && CPU_FAMILY == PPC)
28f540f4
RM
915#define umul_ppmm(ph, pl, m0, m1) \
916 do { \
917 USItype __m0 = (m0), __m1 = (m1); \
41b0afab 918 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
28f540f4
RM
919 (pl) = __m0 * __m1; \
920 } while (0)
921#define UMUL_TIME 15
922#define smul_ppmm(ph, pl, m0, m1) \
923 do { \
924 SItype __m0 = (m0), __m1 = (m1); \
41b0afab 925 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
28f540f4
RM
926 (pl) = __m0 * __m1; \
927 } while (0)
928#define SMUL_TIME 14
929#define UDIV_TIME 120
28f540f4 930#endif
41b0afab
RM
931#endif /* 32-bit POWER architecture variants. */
932
933/* We should test _IBMR2 here when we add assembly support for the system
934 vendor compilers. */
935#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
09af82c9
RM
936#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
937 do { \
938 if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 939 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
41b0afab
RM
940 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
941 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
c3c8283c 942 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
41b0afab 943 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
09af82c9 944 else \
c3c8283c 945 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
41b0afab
RM
946 : "=r" (sh), "=&r" (sl) \
947 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
09af82c9
RM
948 } while (0)
949#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
950 do { \
951 if (__builtin_constant_p (ah) && (ah) == 0) \
c3c8283c 952 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
41b0afab
RM
953 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
954 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
c3c8283c 955 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
41b0afab 956 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
09af82c9 957 else if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 958 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
41b0afab
RM
959 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
960 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
c3c8283c 961 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
41b0afab 962 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
09af82c9 963 else \
c3c8283c 964 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
41b0afab
RM
965 : "=r" (sh), "=&r" (sl) \
966 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
09af82c9 967 } while (0)
09af82c9 968#define count_leading_zeros(count, x) \
41b0afab 969 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
09af82c9 970#define COUNT_LEADING_ZEROS_0 64
09af82c9
RM
971#define umul_ppmm(ph, pl, m0, m1) \
972 do { \
973 UDItype __m0 = (m0), __m1 = (m1); \
41b0afab 974 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
09af82c9
RM
975 (pl) = __m0 * __m1; \
976 } while (0)
41b0afab 977#define UMUL_TIME 15
09af82c9
RM
978#define smul_ppmm(ph, pl, m0, m1) \
979 do { \
980 DItype __m0 = (m0), __m1 = (m1); \
41b0afab 981 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
09af82c9
RM
982 (pl) = __m0 * __m1; \
983 } while (0)
41b0afab
RM
984#define SMUL_TIME 14 /* ??? */
985#define UDIV_TIME 120 /* ??? */
986#endif /* 64-bit PowerPC. */
28f540f4 987
e9b3e3c5 988#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
28f540f4 989#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 990 __asm__ ("a %1,%5\n\tae %0,%3" \
1da2d51a
UD
991 : "=r" ((USItype) (sh)), \
992 "=&r" ((USItype) (sl)) \
993 : "%0" ((USItype) (ah)), \
994 "r" ((USItype) (bh)), \
995 "%1" ((USItype) (al)), \
996 "r" ((USItype) (bl)))
28f540f4 997#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 998 __asm__ ("s %1,%5\n\tse %0,%3" \
1da2d51a
UD
999 : "=r" ((USItype) (sh)), \
1000 "=&r" ((USItype) (sl)) \
1001 : "0" ((USItype) (ah)), \
1002 "r" ((USItype) (bh)), \
1003 "1" ((USItype) (al)), \
1004 "r" ((USItype) (bl)))
28f540f4
RM
1005#define umul_ppmm(ph, pl, m0, m1) \
1006 do { \
1007 USItype __m0 = (m0), __m1 = (m1); \
1008 __asm__ ( \
41b0afab
RM
1009 "s r2,r2\n" \
1010" mts r10,%2\n" \
1011" m r2,%3\n" \
1012" m r2,%3\n" \
1013" m r2,%3\n" \
1014" m r2,%3\n" \
1015" m r2,%3\n" \
1016" m r2,%3\n" \
1017" m r2,%3\n" \
1018" m r2,%3\n" \
1019" m r2,%3\n" \
1020" m r2,%3\n" \
1021" m r2,%3\n" \
1022" m r2,%3\n" \
1023" m r2,%3\n" \
1024" m r2,%3\n" \
1025" m r2,%3\n" \
1026" m r2,%3\n" \
1027" cas %0,r2,r0\n" \
1028" mfs r10,%1" \
1da2d51a
UD
1029 : "=r" ((USItype) (ph)), \
1030 "=r" ((USItype) (pl)) \
28f540f4
RM
1031 : "%r" (__m0), \
1032 "r" (__m1) \
1033 : "r2"); \
1034 (ph) += ((((SItype) __m0 >> 31) & __m1) \
1035 + (((SItype) __m1 >> 31) & __m0)); \
1036 } while (0)
1037#define UMUL_TIME 20
1038#define UDIV_TIME 200
1039#define count_leading_zeros(count, x) \
1040 do { \
1041 if ((x) >= 0x10000) \
1042 __asm__ ("clz %0,%1" \
1da2d51a
UD
1043 : "=r" ((USItype) (count)) \
1044 : "r" ((USItype) (x) >> 16)); \
28f540f4
RM
1045 else \
1046 { \
1047 __asm__ ("clz %0,%1" \
1da2d51a
UD
1048 : "=r" ((USItype) (count)) \
1049 : "r" ((USItype) (x))); \
28f540f4
RM
1050 (count) += 16; \
1051 } \
1052 } while (0)
8f5ca04b
RM
1053#endif
1054
5d025ea6
JM
1055#if defined(__riscv)
1056#ifdef __riscv_mul
1057#define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
1058#define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
1059#else
1060#if __riscv_xlen == 32
1061 #define MULUW3 "call __mulsi3"
1062#elif __riscv_xlen == 64
1063 #define MULUW3 "call __muldi3"
1064#else
1065#error unsupport xlen
1066#endif /* __riscv_xlen */
1067/* We rely on the fact that MULUW3 doesn't clobber the t-registers.
1068 It can get better register allocation result. */
1069#define __muluw3(a, b) \
1070 ({ \
1071 register UWtype __op0 asm ("a0") = a; \
1072 register UWtype __op1 asm ("a1") = b; \
1073 asm volatile (MULUW3 \
1074 : "+r" (__op0), "+r" (__op1) \
1075 : \
1076 : "ra", "a2", "a3"); \
1077 __op0; \
1078 })
1079#endif /* __riscv_mul */
1080#define umul_ppmm(w1, w0, u, v) \
1081 do { \
1082 UWtype __x0, __x1, __x2, __x3; \
1083 UHWtype __ul, __vl, __uh, __vh; \
1084 \
1085 __ul = __ll_lowpart (u); \
1086 __uh = __ll_highpart (u); \
1087 __vl = __ll_lowpart (v); \
1088 __vh = __ll_highpart (v); \
1089 \
1090 __x0 = __muluw3 (__ul, __vl); \
1091 __x1 = __muluw3 (__ul, __vh); \
1092 __x2 = __muluw3 (__uh, __vl); \
1093 __x3 = __muluw3 (__uh, __vh); \
1094 \
1095 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1096 __x1 += __x2; /* but this indeed can */ \
1097 if (__x1 < __x2) /* did we get it? */ \
1098 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1099 \
1100 (w1) = __x3 + __ll_highpart (__x1); \
1101 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
1102 } while (0)
1103#endif /* __riscv */
1104
1105#if defined(__sh__) && W_TYPE_SIZE == 32
24784465 1106#ifndef __sh1__
e9b3e3c5
UD
1107#define umul_ppmm(w1, w0, u, v) \
1108 __asm__ ( \
24784465
RM
1109 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
1110 : "=r<" ((USItype)(w1)), \
1111 "=r<" ((USItype)(w0)) \
e9b3e3c5
UD
1112 : "r" ((USItype)(u)), \
1113 "r" ((USItype)(v)) \
1114 : "macl", "mach")
1115#define UMUL_TIME 5
1116#endif
1117
24784465
RM
1118/* This is the same algorithm as __udiv_qrnnd_c. */
1119#define UDIV_NEEDS_NORMALIZATION 1
1120
5d29eefd
AS
1121#ifdef __FDPIC__
1122/* FDPIC needs a special version of the asm fragment to extract the
1123 code address from the function descriptor. __udiv_qrnnd_16 is
1124 assumed to be local and not to use the GOT, so loading r12 is
1125 not needed. */
1126#define udiv_qrnnd(q, r, n1, n0, d) \
1127 do { \
1128 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
1129 __attribute__ ((visibility ("hidden"))); \
1130 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1131 __asm__ ( \
1132 "mov%M4 %4,r5\n" \
1133" swap.w %3,r4\n" \
1134" swap.w r5,r6\n" \
1135" mov.l @%5,r2\n" \
1136" jsr @r2\n" \
1137" shll16 r6\n" \
1138" swap.w r4,r4\n" \
1139" mov.l @%5,r2\n" \
1140" jsr @r2\n" \
1141" swap.w r1,%0\n" \
1142" or r1,%0" \
1143 : "=r" (q), "=&z" (r) \
1144 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
1145 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
1146 } while (0)
1147#else
24784465
RM
1148#define udiv_qrnnd(q, r, n1, n0, d) \
1149 do { \
1150 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
48693bea 1151 __attribute__ ((visibility ("hidden"))); \
24784465
RM
1152 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1153 __asm__ ( \
1154 "mov%M4 %4,r5\n" \
1155" swap.w %3,r4\n" \
1156" swap.w r5,r6\n" \
1157" jsr @%5\n" \
1158" shll16 r6\n" \
1159" swap.w r4,r4\n" \
1160" jsr @%5\n" \
1161" swap.w r1,%0\n" \
1162" or r1,%0" \
1163 : "=r" (q), "=&z" (r) \
1164 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
78fd882a 1165 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
24784465 1166 } while (0)
5d29eefd 1167#endif /* __FDPIC__ */
24784465
RM
1168
1169#define UDIV_TIME 80
1170
1171#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1172 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
1173 : "=r" (sh), "=r" (sl) \
def7fbd6 1174 : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
24784465
RM
1175
1176#endif /* __sh__ */
1177
41b0afab
RM
1178#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1179 && W_TYPE_SIZE == 32
28f540f4 1180#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1181 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
1da2d51a
UD
1182 : "=r" ((USItype) (sh)), \
1183 "=&r" ((USItype) (sl)) \
1184 : "%rJ" ((USItype) (ah)), \
1185 "rI" ((USItype) (bh)), \
1186 "%rJ" ((USItype) (al)), \
1187 "rI" ((USItype) (bl)) \
28f540f4
RM
1188 __CLOBBER_CC)
1189#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1190 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
1da2d51a
UD
1191 : "=r" ((USItype) (sh)), \
1192 "=&r" ((USItype) (sl)) \
1193 : "rJ" ((USItype) (ah)), \
1194 "rI" ((USItype) (bh)), \
1195 "rJ" ((USItype) (al)), \
1196 "rI" ((USItype) (bl)) \
28f540f4 1197 __CLOBBER_CC)
402fe938
DM
1198#if defined (__sparc_v9__)
1199#define umul_ppmm(w1, w0, u, v) \
1200 do { \
1201 register USItype __g1 asm ("g1"); \
1202 __asm__ ("umul\t%2,%3,%1\n\t" \
1203 "srlx\t%1, 32, %0" \
1204 : "=r" ((USItype) (w1)), \
1205 "=r" (__g1) \
1206 : "r" ((USItype) (u)), \
1207 "r" ((USItype) (v))); \
1208 (w0) = __g1; \
1209 } while (0)
1210#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1211 __asm__ ("mov\t%2,%%y\n\t" \
1212 "udiv\t%3,%4,%0\n\t" \
1213 "umul\t%0,%4,%1\n\t" \
1214 "sub\t%3,%1,%1" \
1215 : "=&r" ((USItype) (__q)), \
1216 "=&r" ((USItype) (__r)) \
1217 : "r" ((USItype) (__n1)), \
1218 "r" ((USItype) (__n0)), \
1219 "r" ((USItype) (__d)))
1220#else
28f540f4 1221#if defined (__sparc_v8__)
28f540f4
RM
1222#define umul_ppmm(w1, w0, u, v) \
1223 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1da2d51a
UD
1224 : "=r" ((USItype) (w1)), \
1225 "=r" ((USItype) (w0)) \
1226 : "r" ((USItype) (u)), \
1227 "r" ((USItype) (v)))
41b0afab 1228#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1da2d51a 1229 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
41b0afab
RM
1230 : "=&r" ((USItype) (__q)), \
1231 "=&r" ((USItype) (__r)) \
1232 : "r" ((USItype) (__n1)), \
1233 "r" ((USItype) (__n0)), \
1234 "r" ((USItype) (__d)))
1da2d51a 1235#else
28f540f4
RM
1236#if defined (__sparclite__)
1237/* This has hardware multiply but not divide. It also has two additional
1238 instructions scan (ffs from high bit) and divscc. */
1239#define umul_ppmm(w1, w0, u, v) \
1240 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1da2d51a
UD
1241 : "=r" ((USItype) (w1)), \
1242 "=r" ((USItype) (w0)) \
1243 : "r" ((USItype) (u)), \
1244 "r" ((USItype) (v)))
28f540f4 1245#define udiv_qrnnd(q, r, n1, n0, d) \
772d0e1a 1246 __asm__ ("! Inlined udiv_qrnnd\n" \
41b0afab
RM
1247" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1248" tst %%g0\n" \
1249" divscc %3,%4,%%g1\n" \
1250" divscc %%g1,%4,%%g1\n" \
1251" divscc %%g1,%4,%%g1\n" \
1252" divscc %%g1,%4,%%g1\n" \
1253" divscc %%g1,%4,%%g1\n" \
1254" divscc %%g1,%4,%%g1\n" \
1255" divscc %%g1,%4,%%g1\n" \
1256" divscc %%g1,%4,%%g1\n" \
1257" divscc %%g1,%4,%%g1\n" \
1258" divscc %%g1,%4,%%g1\n" \
1259" divscc %%g1,%4,%%g1\n" \
1260" divscc %%g1,%4,%%g1\n" \
1261" divscc %%g1,%4,%%g1\n" \
1262" divscc %%g1,%4,%%g1\n" \
1263" divscc %%g1,%4,%%g1\n" \
1264" divscc %%g1,%4,%%g1\n" \
1265" divscc %%g1,%4,%%g1\n" \
1266" divscc %%g1,%4,%%g1\n" \
1267" divscc %%g1,%4,%%g1\n" \
1268" divscc %%g1,%4,%%g1\n" \
1269" divscc %%g1,%4,%%g1\n" \
1270" divscc %%g1,%4,%%g1\n" \
1271" divscc %%g1,%4,%%g1\n" \
1272" divscc %%g1,%4,%%g1\n" \
1273" divscc %%g1,%4,%%g1\n" \
1274" divscc %%g1,%4,%%g1\n" \
1275" divscc %%g1,%4,%%g1\n" \
1276" divscc %%g1,%4,%%g1\n" \
1277" divscc %%g1,%4,%%g1\n" \
1278" divscc %%g1,%4,%%g1\n" \
1279" divscc %%g1,%4,%%g1\n" \
1280" divscc %%g1,%4,%0\n" \
1281" rd %%y,%1\n" \
1282" bl,a 1f\n" \
1283" add %1,%4,%1\n" \
772d0e1a 1284"1: ! End of inline udiv_qrnnd" \
1da2d51a
UD
1285 : "=r" ((USItype) (q)), \
1286 "=r" ((USItype) (r)) \
1287 : "r" ((USItype) (n1)), \
1288 "r" ((USItype) (n0)), \
1289 "rI" ((USItype) (d)) \
e9b3e3c5 1290 : "g1" __AND_CLOBBER_CC)
28f540f4
RM
1291#define UDIV_TIME 37
1292#define count_leading_zeros(count, x) \
41b0afab
RM
1293 do { \
1294 __asm__ ("scan %1,1,%0" \
48693bea
AK
1295 : "=r" ((USItype) (count)) \
1296 : "r" ((USItype) (x))); \
62818cfd 1297 } while (0)
e9b3e3c5
UD
1298/* Early sparclites return 63 for an argument of 0, but they warn that future
1299 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1300 undefined. */
1da2d51a
UD
1301#else
1302/* SPARC without integer multiplication and divide instructions.
1303 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
28f540f4 1304#define umul_ppmm(w1, w0, u, v) \
772d0e1a 1305 __asm__ ("! Inlined umul_ppmm\n" \
41b0afab
RM
1306" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
1307" sra %3,31,%%o5 ! Don't move this insn\n" \
1308" and %2,%%o5,%%o5 ! Don't move this insn\n" \
1309" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1310" mulscc %%g1,%3,%%g1\n" \
1311" mulscc %%g1,%3,%%g1\n" \
1312" mulscc %%g1,%3,%%g1\n" \
1313" mulscc %%g1,%3,%%g1\n" \
1314" mulscc %%g1,%3,%%g1\n" \
1315" mulscc %%g1,%3,%%g1\n" \
1316" mulscc %%g1,%3,%%g1\n" \
1317" mulscc %%g1,%3,%%g1\n" \
1318" mulscc %%g1,%3,%%g1\n" \
1319" mulscc %%g1,%3,%%g1\n" \
1320" mulscc %%g1,%3,%%g1\n" \
1321" mulscc %%g1,%3,%%g1\n" \
1322" mulscc %%g1,%3,%%g1\n" \
1323" mulscc %%g1,%3,%%g1\n" \
1324" mulscc %%g1,%3,%%g1\n" \
1325" mulscc %%g1,%3,%%g1\n" \
1326" mulscc %%g1,%3,%%g1\n" \
1327" mulscc %%g1,%3,%%g1\n" \
1328" mulscc %%g1,%3,%%g1\n" \
1329" mulscc %%g1,%3,%%g1\n" \
1330" mulscc %%g1,%3,%%g1\n" \
1331" mulscc %%g1,%3,%%g1\n" \
1332" mulscc %%g1,%3,%%g1\n" \
1333" mulscc %%g1,%3,%%g1\n" \
1334" mulscc %%g1,%3,%%g1\n" \
1335" mulscc %%g1,%3,%%g1\n" \
1336" mulscc %%g1,%3,%%g1\n" \
1337" mulscc %%g1,%3,%%g1\n" \
1338" mulscc %%g1,%3,%%g1\n" \
1339" mulscc %%g1,%3,%%g1\n" \
1340" mulscc %%g1,%3,%%g1\n" \
1341" mulscc %%g1,%3,%%g1\n" \
1342" mulscc %%g1,0,%%g1\n" \
1343" add %%g1,%%o5,%0\n" \
1344" rd %%y,%1" \
1da2d51a
UD
1345 : "=r" ((USItype) (w1)), \
1346 "=r" ((USItype) (w0)) \
1347 : "%rI" ((USItype) (u)), \
1348 "r" ((USItype) (v)) \
e9b3e3c5 1349 : "g1", "o5" __AND_CLOBBER_CC)
28f540f4 1350#define UMUL_TIME 39 /* 39 instructions */
390a4882 1351/* It's quite necessary to add this much assembler for the sparc.
41b0afab
RM
1352 The default udiv_qrnnd (in C) is more than 10 times slower! */
1353#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
772d0e1a
AJ
1354 __asm__ ("! Inlined udiv_qrnnd\n" \
1355" mov 32,%%g1\n" \
1356" subcc %1,%2,%%g0\n" \
1357"1: bcs 5f\n" \
41b0afab 1358" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
772d0e1a
AJ
1359" sub %1,%2,%1 ! this kills msb of n\n" \
1360" addx %1,%1,%1 ! so this can't give carry\n" \
1361" subcc %%g1,1,%%g1\n" \
1362"2: bne 1b\n" \
41b0afab 1363" subcc %1,%2,%%g0\n" \
772d0e1a 1364" bcs 3f\n" \
41b0afab 1365" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
772d0e1a 1366" b 3f\n" \
41b0afab 1367" sub %1,%2,%1 ! this kills msb of n\n" \
772d0e1a
AJ
1368"4: sub %1,%2,%1\n" \
1369"5: addxcc %1,%1,%1\n" \
1370" bcc 2b\n" \
41b0afab 1371" subcc %%g1,1,%%g1\n" \
772d0e1a
AJ
1372"! Got carry from n. Subtract next step to cancel this carry.\n" \
1373" bne 4b\n" \
41b0afab 1374" addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
772d0e1a
AJ
1375" sub %1,%2,%1\n" \
1376"3: xnor %0,0,%0\n" \
41b0afab
RM
1377" ! End of inline udiv_qrnnd" \
1378 : "=&r" ((USItype) (__q)), \
1379 "=&r" ((USItype) (__r)) \
1380 : "r" ((USItype) (__d)), \
1381 "1" ((USItype) (__n1)), \
1382 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1383#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
1da2d51a
UD
1384#endif /* __sparclite__ */
1385#endif /* __sparc_v8__ */
402fe938 1386#endif /* __sparc_v9__ */
41b0afab 1387#endif /* sparc32 */
28f540f4 1388
41b0afab
RM
1389#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1390 && W_TYPE_SIZE == 64
e9b3e3c5 1391#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
402fe938
DM
1392 do { \
1393 UDItype __carry = 0; \
1394 __asm__ ("addcc\t%r5,%6,%1\n\t" \
1395 "add\t%r3,%4,%0\n\t" \
1396 "movcs\t%%xcc, 1, %2\n\t" \
1397 "add\t%0, %2, %0" \
1398 : "=r" ((UDItype)(sh)), \
1399 "=&r" ((UDItype)(sl)), \
1400 "+r" (__carry) \
1401 : "%rJ" ((UDItype)(ah)), \
1402 "rI" ((UDItype)(bh)), \
1403 "%rJ" ((UDItype)(al)), \
1404 "rI" ((UDItype)(bl)) \
1405 __CLOBBER_CC); \
1406 } while (0)
e9b3e3c5 1407
402fe938
DM
1408#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1409 do { \
1410 UDItype __carry = 0; \
1411 __asm__ ("subcc\t%r5,%6,%1\n\t" \
1412 "sub\t%r3,%4,%0\n\t" \
1413 "movcs\t%%xcc, 1, %2\n\t" \
2fd6ff13 1414 "sub\t%0, %2, %0" \
402fe938
DM
1415 : "=r" ((UDItype)(sh)), \
1416 "=&r" ((UDItype)(sl)), \
1417 "+r" (__carry) \
1418 : "%rJ" ((UDItype)(ah)), \
1419 "rI" ((UDItype)(bh)), \
1420 "%rJ" ((UDItype)(al)), \
1421 "rI" ((UDItype)(bl)) \
1422 __CLOBBER_CC); \
1423 } while (0)
e9b3e3c5
UD
1424
1425#define umul_ppmm(wh, wl, u, v) \
1426 do { \
1427 UDItype tmp1, tmp2, tmp3, tmp4; \
1428 __asm__ __volatile__ ( \
41b0afab
RM
1429 "srl %7,0,%3\n\t" \
1430 "mulx %3,%6,%1\n\t" \
1431 "srlx %6,32,%2\n\t" \
1432 "mulx %2,%3,%4\n\t" \
1433 "sllx %4,32,%5\n\t" \
1434 "srl %6,0,%3\n\t" \
1435 "sub %1,%5,%5\n\t" \
1436 "srlx %5,32,%5\n\t" \
1437 "addcc %4,%5,%4\n\t" \
1438 "srlx %7,32,%5\n\t" \
1439 "mulx %3,%5,%3\n\t" \
1440 "mulx %2,%5,%5\n\t" \
1441 "sethi %%hi(0x80000000),%2\n\t" \
1442 "addcc %4,%3,%4\n\t" \
1443 "srlx %4,32,%4\n\t" \
1444 "add %2,%2,%2\n\t" \
1445 "movcc %%xcc,%%g0,%2\n\t" \
1446 "addcc %5,%4,%5\n\t" \
1447 "sllx %3,32,%3\n\t" \
1448 "add %1,%3,%1\n\t" \
772d0e1a 1449 "add %5,%2,%0" \
e9b3e3c5
UD
1450 : "=r" ((UDItype)(wh)), \
1451 "=&r" ((UDItype)(wl)), \
1452 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
1453 : "r" ((UDItype)(u)), \
1454 "r" ((UDItype)(v)) \
1455 __CLOBBER_CC); \
1456 } while (0)
1457#define UMUL_TIME 96
1458#define UDIV_TIME 230
313fed01 1459#endif /* sparc64 */
e9b3e3c5
UD
1460
1461#if defined (__vax__) && W_TYPE_SIZE == 32
28f540f4 1462#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1463 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
1da2d51a
UD
1464 : "=g" ((USItype) (sh)), \
1465 "=&g" ((USItype) (sl)) \
1466 : "%0" ((USItype) (ah)), \
1467 "g" ((USItype) (bh)), \
1468 "%1" ((USItype) (al)), \
1469 "g" ((USItype) (bl)))
28f540f4 1470#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1471 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
1da2d51a
UD
1472 : "=g" ((USItype) (sh)), \
1473 "=&g" ((USItype) (sl)) \
1474 : "0" ((USItype) (ah)), \
1475 "g" ((USItype) (bh)), \
1476 "1" ((USItype) (al)), \
1477 "g" ((USItype) (bl)))
28f540f4
RM
1478#define umul_ppmm(xh, xl, m0, m1) \
1479 do { \
1da2d51a
UD
1480 union { \
1481 UDItype __ll; \
1482 struct {USItype __l, __h;} __i; \
1483 } __xx; \
28f540f4
RM
1484 USItype __m0 = (m0), __m1 = (m1); \
1485 __asm__ ("emul %1,%2,$0,%0" \
1da2d51a 1486 : "=r" (__xx.__ll) \
28f540f4
RM
1487 : "g" (__m0), \
1488 "g" (__m1)); \
1da2d51a
UD
1489 (xh) = __xx.__i.__h; \
1490 (xl) = __xx.__i.__l; \
28f540f4
RM
1491 (xh) += ((((SItype) __m0 >> 31) & __m1) \
1492 + (((SItype) __m1 >> 31) & __m0)); \
1493 } while (0)
1494#define sdiv_qrnnd(q, r, n1, n0, d) \
1495 do { \
1496 union {DItype __ll; \
1497 struct {SItype __l, __h;} __i; \
1498 } __xx; \
1499 __xx.__i.__h = n1; __xx.__i.__l = n0; \
1500 __asm__ ("ediv %3,%2,%0,%1" \
1501 : "=g" (q), "=g" (r) \
1da2d51a 1502 : "g" (__xx.__ll), "g" (d)); \
28f540f4
RM
1503 } while (0)
1504#endif /* __vax__ */
1505
8115f29b
L
1506#ifdef _TMS320C6X
1507#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1508 do \
1509 { \
1510 UDItype __ll; \
1511 __asm__ ("addu .l1 %1, %2, %0" \
1512 : "=a" (__ll) : "a" (al), "a" (bl)); \
1513 (sl) = (USItype)__ll; \
1514 (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \
1515 } \
1516 while (0)
1517
1518#ifdef _TMS320C6400_PLUS
1519#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1520#define umul_ppmm(w1, w0, u, v) \
1521 do { \
1522 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
1523 (w1) = (USItype) (__x >> 32); \
1524 (w0) = (USItype) (__x); \
1525 } while (0)
1526#endif /* _TMS320C6400_PLUS */
1527
1528#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
1529#ifdef _TMS320C6400
1530#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
1531#endif
1532#define UMUL_TIME 4
1533#define UDIV_TIME 40
1534#endif /* _TMS320C6X */
1535
24784465
RM
1536#if defined (__xtensa__) && W_TYPE_SIZE == 32
1537/* This code is not Xtensa-configuration-specific, so rely on the compiler
1538 to expand builtin functions depending on what configuration features
1539 are available. This avoids library calls when the operation can be
1540 performed in-line. */
1541#define umul_ppmm(w1, w0, u, v) \
1542 do { \
1543 DWunion __w; \
1544 __w.ll = __builtin_umulsidi3 (u, v); \
1545 w1 = __w.s.high; \
1546 w0 = __w.s.low; \
1547 } while (0)
1548#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
1549#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
1550#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
1551#endif /* __xtensa__ */
1552
def7fbd6
AS
1553#if defined xstormy16
1554extern UHItype __stormy16_count_leading_zeros (UHItype);
1555#define count_leading_zeros(count, x) \
1556 do \
1557 { \
1558 UHItype size; \
1559 \
1560 /* We assume that W_TYPE_SIZE is a multiple of 16... */ \
1561 for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
1562 { \
1563 UHItype c; \
1564 \
1565 c = __clzhi2 ((x) >> (size - 16)); \
1566 (count) += c; \
1567 if (c != 16) \
1568 break; \
1569 } \
1570 } \
1571 while (0)
1572#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1573#endif
1574
e9b3e3c5
UD
1575#if defined (__z8000__) && W_TYPE_SIZE == 16
1576#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1577 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1578 : "=r" ((unsigned int)(sh)), \
1579 "=&r" ((unsigned int)(sl)) \
1580 : "%0" ((unsigned int)(ah)), \
1581 "r" ((unsigned int)(bh)), \
1582 "%1" ((unsigned int)(al)), \
1583 "rQR" ((unsigned int)(bl)))
1584#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1585 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1586 : "=r" ((unsigned int)(sh)), \
1587 "=&r" ((unsigned int)(sl)) \
1588 : "0" ((unsigned int)(ah)), \
1589 "r" ((unsigned int)(bh)), \
1590 "1" ((unsigned int)(al)), \
1591 "rQR" ((unsigned int)(bl)))
1592#define umul_ppmm(xh, xl, m0, m1) \
1593 do { \
1594 union {long int __ll; \
1595 struct {unsigned int __h, __l;} __i; \
1596 } __xx; \
1597 unsigned int __m0 = (m0), __m1 = (m1); \
1598 __asm__ ("mult %S0,%H3" \
1599 : "=r" (__xx.__i.__h), \
1600 "=r" (__xx.__i.__l) \
1601 : "%1" (__m0), \
1602 "rQR" (__m1)); \
1603 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1604 (xh) += ((((signed int) __m0 >> 15) & __m1) \
1605 + (((signed int) __m1 >> 15) & __m0)); \
1606 } while (0)
1607#endif /* __z8000__ */
1608
28f540f4
RM
1609#endif /* __GNUC__ */
1610
28f540f4
RM
1611/* If this machine has no inline assembler, use C macros. */
1612
1613#if !defined (add_ssaaaa)
1614#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1615 do { \
e9b3e3c5 1616 UWtype __x; \
28f540f4
RM
1617 __x = (al) + (bl); \
1618 (sh) = (ah) + (bh) + (__x < (al)); \
1619 (sl) = __x; \
1620 } while (0)
1621#endif
1622
1623#if !defined (sub_ddmmss)
1624#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1625 do { \
e9b3e3c5 1626 UWtype __x; \
28f540f4
RM
1627 __x = (al) - (bl); \
1628 (sh) = (ah) - (bh) - (__x > (al)); \
1629 (sl) = __x; \
1630 } while (0)
1631#endif
1632
f30070ae
RM
1633/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1634 smul_ppmm. */
1635#if !defined (umul_ppmm) && defined (smul_ppmm)
1636#define umul_ppmm(w1, w0, u, v) \
1637 do { \
1638 UWtype __w1; \
1639 UWtype __xm0 = (u), __xm1 = (v); \
1640 smul_ppmm (__w1, w0, __xm0, __xm1); \
1641 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1642 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1643 } while (0)
1644#endif
1645
1646/* If we still don't have umul_ppmm, define it using plain C. */
28f540f4
RM
1647#if !defined (umul_ppmm)
1648#define umul_ppmm(w1, w0, u, v) \
1649 do { \
e9b3e3c5
UD
1650 UWtype __x0, __x1, __x2, __x3; \
1651 UHWtype __ul, __vl, __uh, __vh; \
28f540f4 1652 \
1da2d51a
UD
1653 __ul = __ll_lowpart (u); \
1654 __uh = __ll_highpart (u); \
1655 __vl = __ll_lowpart (v); \
1656 __vh = __ll_highpart (v); \
28f540f4 1657 \
e9b3e3c5
UD
1658 __x0 = (UWtype) __ul * __vl; \
1659 __x1 = (UWtype) __ul * __vh; \
1660 __x2 = (UWtype) __uh * __vl; \
1661 __x3 = (UWtype) __uh * __vh; \
28f540f4
RM
1662 \
1663 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1664 __x1 += __x2; /* but this indeed can */ \
1665 if (__x1 < __x2) /* did we get it? */ \
41b0afab 1666 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
28f540f4
RM
1667 \
1668 (w1) = __x3 + __ll_highpart (__x1); \
1da2d51a 1669 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
28f540f4
RM
1670 } while (0)
1671#endif
1672
1da2d51a
UD
1673#if !defined (__umulsidi3)
1674#define __umulsidi3(u, v) \
41b0afab 1675 ({DWunion __w; \
1da2d51a
UD
1676 umul_ppmm (__w.s.high, __w.s.low, u, v); \
1677 __w.ll; })
8f5ca04b
RM
1678#endif
1679
28f540f4
RM
1680/* Define this unconditionally, so it can be used for debugging. */
1681#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1682 do { \
e9b3e3c5
UD
1683 UWtype __d1, __d0, __q1, __q0; \
1684 UWtype __r1, __r0, __m; \
28f540f4
RM
1685 __d1 = __ll_highpart (d); \
1686 __d0 = __ll_lowpart (d); \
1687 \
1688 __r1 = (n1) % __d1; \
1689 __q1 = (n1) / __d1; \
e9b3e3c5 1690 __m = (UWtype) __q1 * __d0; \
28f540f4
RM
1691 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
1692 if (__r1 < __m) \
1693 { \
1694 __q1--, __r1 += (d); \
1695 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1696 if (__r1 < __m) \
1697 __q1--, __r1 += (d); \
1698 } \
1699 __r1 -= __m; \
1700 \
1701 __r0 = __r1 % __d1; \
1702 __q0 = __r1 / __d1; \
e9b3e3c5 1703 __m = (UWtype) __q0 * __d0; \
28f540f4
RM
1704 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1705 if (__r0 < __m) \
1706 { \
1707 __q0--, __r0 += (d); \
1708 if (__r0 >= (d)) \
1709 if (__r0 < __m) \
1710 __q0--, __r0 += (d); \
1711 } \
1712 __r0 -= __m; \
1713 \
e9b3e3c5 1714 (q) = (UWtype) __q1 * __ll_B | __q0; \
28f540f4
RM
1715 (r) = __r0; \
1716 } while (0)
1717
1718/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1719 __udiv_w_sdiv (defined in libgcc or elsewhere). */
1720#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1721#define udiv_qrnnd(q, r, nh, nl, d) \
1722 do { \
d3c827e7
AK
1723 extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \
1724 UWtype __r; \
1da2d51a 1725 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
28f540f4
RM
1726 (r) = __r; \
1727 } while (0)
1728#endif
1729
1730/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1731#if !defined (udiv_qrnnd)
1732#define UDIV_NEEDS_NORMALIZATION 1
1733#define udiv_qrnnd __udiv_qrnnd_c
1734#endif
1735
1736#if !defined (count_leading_zeros)
28f540f4
RM
1737#define count_leading_zeros(count, x) \
1738 do { \
e9b3e3c5
UD
1739 UWtype __xr = (x); \
1740 UWtype __a; \
28f540f4 1741 \
e9b3e3c5 1742 if (W_TYPE_SIZE <= 32) \
28f540f4 1743 { \
e9b3e3c5
UD
1744 __a = __xr < ((UWtype)1<<2*__BITS4) \
1745 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
1746 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
28f540f4
RM
1747 } \
1748 else \
1749 { \
e9b3e3c5 1750 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
28f540f4
RM
1751 if (((__xr >> __a) & 0xff) != 0) \
1752 break; \
1753 } \
1754 \
e9b3e3c5 1755 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
28f540f4 1756 } while (0)
e9b3e3c5 1757#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
28f540f4
RM
1758#endif
1759
62818cfd
UD
1760#if !defined (count_trailing_zeros)
1761/* Define count_trailing_zeros using count_leading_zeros. The latter might be
1762 defined in asm, but if it is not, the C version above is good enough. */
1763#define count_trailing_zeros(count, x) \
1764 do { \
e9b3e3c5
UD
1765 UWtype __ctz_x = (x); \
1766 UWtype __ctz_c; \
62818cfd 1767 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
e9b3e3c5 1768 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
62818cfd
UD
1769 } while (0)
1770#endif
1771
28f540f4
RM
1772#ifndef UDIV_NEEDS_NORMALIZATION
1773#define UDIV_NEEDS_NORMALIZATION 0
1774#endif