]> git.ipfire.org Git - thirdparty/gcc.git/blame - include/longlong.h
Add prange entries in gimple-range-op.cc.
[thirdparty/gcc.git] / include / longlong.h
CommitLineData
e1a79915 1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
a945c346 2 Copyright (C) 1991-2024 Free Software Foundation, Inc.
e1a79915 3
5a248274
JM
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
e1a79915 8 License as published by the Free Software Foundation; either
5a248274
JM
9 version 2.1 of the License, or (at your option) any later version.
10
11 In addition to the permissions in the GNU Lesser General Public
12 License, the Free Software Foundation gives you unlimited
13 permission to link the compiled version of this file into
14 combinations with other programs, and to distribute those
15 combinations without any restriction coming from the use of this
16 file. (The Lesser General Public License restrictions do apply in
17 other respects; for example, they cover modification of the file,
18 and distribution when not linked into a combine executable.)
e1a79915 19
5a248274
JM
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
e1a79915 24
5a248274 25 You should have received a copy of the GNU Lesser General Public
0d03ac1b
L
26 License along with the GNU C Library; if not, see
27 <http://www.gnu.org/licenses/>. */
e1a79915 28
021b3949
JJ
29/* You have to define the following before including this file:
30
31 UWtype -- An unsigned type, default type for operations (typically a "word")
32 UHWtype -- An unsigned type, at least half the size of UWtype.
33 UDWtype -- An unsigned type, at least twice as large a UWtype
34 W_TYPE_SIZE -- size in bits of UWtype
35
36 UQItype -- Unsigned 8 bit type.
37 SItype, USItype -- Signed and unsigned 32 bit types.
38 DItype, UDItype -- Signed and unsigned 64 bit types.
e1a79915 39
021b3949 40 On a 32 bit machine UWtype should typically be USItype;
dcfae47c 41 on a 64 bit machine, UWtype should typically be UDItype. */
021b3949
JJ
42
43#define __BITS4 (W_TYPE_SIZE / 4)
44#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48#ifndef W_TYPE_SIZE
49#define W_TYPE_SIZE 32
50#define UWtype USItype
51#define UHWtype USItype
52#define UDWtype UDItype
53#endif
e1a79915 54
5a248274
JM
55/* Used in glibc only. */
56#ifndef attribute_hidden
57#define attribute_hidden
58#endif
59
60extern const UQItype __clz_tab[256] attribute_hidden;
b4f05423 61
2e8a6600 62/* Define auxiliary asm macros.
e1a79915 63
0fa2e4df
KH
64 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
021b3949 66 word product in HIGH_PROD and LOW_PROD.
e1a79915 67
021b3949
JJ
68 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69 UDWtype product. This is just a variant of umul_ppmm.
e1a79915
RS
70
71 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
021b3949
JJ
72 denominator) divides a UDWtype, composed by the UWtype integers
73 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
75 than DENOMINATOR for correct operation. If, in addition, the most
76 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77 UDIV_NEEDS_NORMALIZATION is defined to 1.
e1a79915 78
b6d2adc7 79 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
021b3949
JJ
80 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
81 is rounded towards 0.
82
83 5) count_leading_zeros(count, x) counts the number of zero-bits from the
cc2902df 84 msb to the first nonzero bit in the UWtype X. This is the number of
021b3949
JJ
85 steps X needs to be shifted left to set the msb. Undefined for X == 0,
86 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
87
88 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89 from the least significant end.
90
91 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
92 high_addend_2, low_addend_2) adds two UWtype integers, composed by
93 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
95 (i.e. carry out) is not stored anywhere, and is lost.
96
97 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
101 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
e1a79915
RS
102 and is lost.
103
104 If any of these macros are left undefined for a particular CPU,
105 C macros are used. */
106
107/* The CPUs come in alphabetical order below.
108
109 Please add support for more CPUs here, or improve the current support
110 for the CPUs below!
f71c71f1 111 (E.g. WE32100, IBM360.) */
d83dd29a 112
3a0d22dd
MM
113#if defined (__GNUC__) && !defined (NO_ASM)
114
d83dd29a
TG
115/* We sometimes need to clobber "cc" with gcc2, but that would not be
116 understood by gcc1. Use cpp to avoid major code duplication. */
117#if __GNUC__ < 2
118#define __CLOBBER_CC
119#define __AND_CLOBBER_CC
120#else /* __GNUC__ >= 2 */
121#define __CLOBBER_CC : "cc"
122#define __AND_CLOBBER_CC , "cc"
123#endif /* __GNUC__ < 2 */
e1a79915 124
130a7859
YZ
125#if defined (__aarch64__)
126
127#if W_TYPE_SIZE == 32
128#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
129#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
130#define COUNT_LEADING_ZEROS_0 32
131#endif /* W_TYPE_SIZE == 32 */
132
133#if W_TYPE_SIZE == 64
134#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X))
135#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X))
136#define COUNT_LEADING_ZEROS_0 64
137#endif /* W_TYPE_SIZE == 64 */
138
139#endif /* __aarch64__ */
140
021b3949 141#if defined (__alpha) && W_TYPE_SIZE == 64
99630555
RH
142/* There is a bug in g++ before version 5 that
143 errors on __builtin_alpha_umulh. */
144#if !defined(__cplusplus) || __GNUC__ >= 5
021b3949
JJ
145#define umul_ppmm(ph, pl, m0, m1) \
146 do { \
147 UDItype __m0 = (m0), __m1 = (m1); \
496e1c4b 148 (ph) = __builtin_alpha_umulh (__m0, __m1); \
021b3949
JJ
149 (pl) = __m0 * __m1; \
150 } while (0)
151#define UMUL_TIME 46
99630555 152#endif /* !c++ */
021b3949
JJ
153#ifndef LONGLONG_STANDALONE
154#define udiv_qrnnd(q, r, n1, n0, d) \
155 do { UDItype __r; \
156 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
157 (r) = __r; \
158 } while (0)
429489e5 159extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
021b3949
JJ
160#define UDIV_TIME 220
161#endif /* LONGLONG_STANDALONE */
8f4773ea 162#ifdef __alpha_cix__
1efd0b97
RH
163#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
164#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
8f4773ea
RH
165#define COUNT_LEADING_ZEROS_0 64
166#else
8f4773ea
RH
167#define count_leading_zeros(COUNT,X) \
168 do { \
169 UDItype __xr = (X), __t, __a; \
1efd0b97 170 __t = __builtin_alpha_cmpbge (0, __xr); \
8f4773ea 171 __a = __clz_tab[__t ^ 0xff] - 1; \
1efd0b97 172 __t = __builtin_alpha_extbl (__xr, __a); \
8f4773ea
RH
173 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
174 } while (0)
175#define count_trailing_zeros(COUNT,X) \
176 do { \
177 UDItype __xr = (X), __t, __a; \
1efd0b97 178 __t = __builtin_alpha_cmpbge (0, __xr); \
8f4773ea
RH
179 __t = ~__t & -~__t; \
180 __a = ((__t & 0xCC) != 0) * 2; \
181 __a += ((__t & 0xF0) != 0) * 4; \
182 __a += ((__t & 0xAA) != 0); \
1efd0b97 183 __t = __builtin_alpha_extbl (__xr, __a); \
8f4773ea
RH
184 __a <<= 3; \
185 __t &= -__t; \
186 __a += ((__t & 0xCC) != 0) * 2; \
187 __a += ((__t & 0xF0) != 0) * 4; \
188 __a += ((__t & 0xAA) != 0); \
189 (COUNT) = __a; \
190 } while (0)
191#endif /* __alpha_cix__ */
021b3949
JJ
192#endif /* __alpha */
193
194#if defined (__arc__) && W_TYPE_SIZE == 32
66ed0683 195#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 196 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
66ed0683
JL
197 : "=r" ((USItype) (sh)), \
198 "=&r" ((USItype) (sl)) \
199 : "%r" ((USItype) (ah)), \
1ab06af6 200 "rICal" ((USItype) (bh)), \
66ed0683 201 "%r" ((USItype) (al)), \
67a9a711
VG
202 "rICal" ((USItype) (bl)) \
203 : "cc")
66ed0683 204#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 205 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
66ed0683
JL
206 : "=r" ((USItype) (sh)), \
207 "=&r" ((USItype) (sl)) \
208 : "r" ((USItype) (ah)), \
1ab06af6 209 "rICal" ((USItype) (bh)), \
66ed0683 210 "r" ((USItype) (al)), \
67a9a711
VG
211 "rICal" ((USItype) (bl)) \
212 : "cc")
d38a64b4
JR
213
214#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
215#ifdef __ARC_NORM__
216#define count_leading_zeros(count, x) \
217 do \
218 { \
219 SItype c_; \
220 \
221 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
222 (count) = c_ + 1; \
223 } \
224 while (0)
225#define COUNT_LEADING_ZEROS_0 32
1ab06af6
CZ
226#endif /* __ARC_NORM__ */
227#endif /* __arc__ */
66ed0683 228
75ffafdc
MH
229#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
230 && W_TYPE_SIZE == 32
e1a79915 231#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 232 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
d6b0bb68
MS
233 : "=r" ((USItype) (sh)), \
234 "=&r" ((USItype) (sl)) \
235 : "%r" ((USItype) (ah)), \
236 "rI" ((USItype) (bh)), \
237 "%r" ((USItype) (al)), \
74900b5a 238 "rI" ((USItype) (bl)) __CLOBBER_CC)
e1a79915 239#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 240 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
d6b0bb68
MS
241 : "=r" ((USItype) (sh)), \
242 "=&r" ((USItype) (sl)) \
243 : "r" ((USItype) (ah)), \
244 "rI" ((USItype) (bh)), \
245 "r" ((USItype) (al)), \
74900b5a 246 "rI" ((USItype) (bl)) __CLOBBER_CC)
8993fde4
RH
247# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
248 || defined(__ARM_ARCH_3__)
249# define umul_ppmm(xh, xl, a, b) \
250 do { \
251 register USItype __t0, __t1, __t2; \
252 __asm__ ("%@ Inlined umul_ppmm\n" \
7efe41a9
RH
253 " mov %2, %5, lsr #16\n" \
254 " mov %0, %6, lsr #16\n" \
255 " bic %3, %5, %2, lsl #16\n" \
256 " bic %4, %6, %0, lsl #16\n" \
257 " mul %1, %3, %4\n" \
258 " mul %4, %2, %4\n" \
259 " mul %3, %0, %3\n" \
260 " mul %0, %2, %0\n" \
261 " adds %3, %4, %3\n" \
262 " addcs %0, %0, #65536\n" \
263 " adds %1, %1, %3, lsl #16\n" \
264 " adc %0, %0, %3, lsr #16" \
0f41302f
MS
265 : "=&r" ((USItype) (xh)), \
266 "=r" ((USItype) (xl)), \
6a04a634 267 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
0f41302f 268 : "r" ((USItype) (a)), \
8993fde4
RH
269 "r" ((USItype) (b)) __CLOBBER_CC ); \
270 } while (0)
271# define UMUL_TIME 20
272# else
273# define umul_ppmm(xh, xl, a, b) \
274 do { \
275 /* Generate umull, under compiler control. */ \
276 register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
277 (xl) = (USItype)__t0; \
278 (xh) = (USItype)(__t0 >> 32); \
279 } while (0)
280# define UMUL_TIME 3
281# endif
282# define UDIV_TIME 100
e1a79915
RS
283#endif /* __arm__ */
284
ef0a4b67
PB
285#if defined(__arm__)
286/* Let gcc decide how best to implement count_leading_zeros. */
287#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
8993fde4 288#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
ef0a4b67
PB
289#define COUNT_LEADING_ZEROS_0 32
290#endif
291
6dab9931
GJL
292#if defined (__AVR__)
293
294#if W_TYPE_SIZE == 16
295#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
296#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
297#define COUNT_LEADING_ZEROS_0 16
298#endif /* W_TYPE_SIZE == 16 */
299
300#if W_TYPE_SIZE == 32
0ad8bb3b
GJL
301#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
302#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
303#define COUNT_LEADING_ZEROS_0 32
6dab9931
GJL
304#endif /* W_TYPE_SIZE == 32 */
305
306#if W_TYPE_SIZE == 64
307#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X))
308#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
309#define COUNT_LEADING_ZEROS_0 64
310#endif /* W_TYPE_SIZE == 64 */
311
312#endif /* defined (__AVR__) */
0ad8bb3b 313
0e499e75
HPN
314#if defined (__CRIS__)
315
316#if __CRIS_arch_version >= 3
e636e508 317#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
0e499e75
HPN
318#define COUNT_LEADING_ZEROS_0 32
319#endif /* __CRIS_arch_version >= 3 */
320
9ef4a0cd
JN
321#if __CRIS_arch_version >= 8
322#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
0e499e75
HPN
323#endif /* __CRIS_arch_version >= 8 */
324
325#if __CRIS_arch_version >= 10
326#define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
327#else
328#define __umulsidi3 __umulsidi3
329extern UDItype __umulsidi3 (USItype, USItype);
330#endif /* __CRIS_arch_version >= 10 */
331
332#define umul_ppmm(w1, w0, u, v) \
333 do { \
334 UDItype __x = __umulsidi3 (u, v); \
335 (w0) = (USItype) (__x); \
336 (w1) = (USItype) (__x >> 32); \
337 } while (0)
338
339/* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
340 DFmode ("double" intrinsics, avoiding two of the three insns handling
341 carry), but defining them as open-code C composing and doing the
342 operation in DImode (UDImode) shows that the DImode needs work:
343 register pressure from requiring neighboring registers and the
344 traffic to and from them come to dominate, in the 4.7 series. */
345
346#endif /* defined (__CRIS__) */
e636e508 347
021b3949 348#if defined (__hppa) && W_TYPE_SIZE == 32
e1a79915 349#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 350 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
0f41302f
MS
351 : "=r" ((USItype) (sh)), \
352 "=&r" ((USItype) (sl)) \
353 : "%rM" ((USItype) (ah)), \
354 "rM" ((USItype) (bh)), \
355 "%rM" ((USItype) (al)), \
356 "rM" ((USItype) (bl)))
e1a79915 357#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 358 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
0f41302f
MS
359 : "=r" ((USItype) (sh)), \
360 "=&r" ((USItype) (sl)) \
361 : "rM" ((USItype) (ah)), \
362 "rM" ((USItype) (bh)), \
363 "rM" ((USItype) (al)), \
364 "rM" ((USItype) (bl)))
d83dd29a
TG
365#if defined (_PA_RISC1_1)
366#define umul_ppmm(w1, w0, u, v) \
367 do { \
368 union \
369 { \
e6c5404d
TG
370 UDItype __f; \
371 struct {USItype __w1, __w0;} __w1w0; \
d83dd29a
TG
372 } __t; \
373 __asm__ ("xmpyu %1,%2,%0" \
374 : "=x" (__t.__f) \
0f41302f
MS
375 : "x" ((USItype) (u)), \
376 "x" ((USItype) (v))); \
d83dd29a
TG
377 (w1) = __t.__w1w0.__w1; \
378 (w0) = __t.__w1w0.__w0; \
379 } while (0)
380#define UMUL_TIME 8
381#else
382#define UMUL_TIME 30
383#endif
384#define UDIV_TIME 40
f71c71f1
TG
385#define count_leading_zeros(count, x) \
386 do { \
387 USItype __tmp; \
388 __asm__ ( \
7efe41a9
RH
389 "ldi 1,%0\n" \
390" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
391" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
392" ldo 16(%0),%0 ; Yes. Perform add.\n" \
393" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
394" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
395" ldo 8(%0),%0 ; Yes. Perform add.\n" \
396" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
397" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
398" ldo 4(%0),%0 ; Yes. Perform add.\n" \
399" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
400" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
401" ldo 2(%0),%0 ; Yes. Perform add.\n" \
402" extru %1,30,1,%1 ; Extract bit 1.\n" \
403" sub %0,%1,%0 ; Subtract it.\n" \
404 : "=r" (count), "=r" (__tmp) : "1" (x)); \
f71c71f1 405 } while (0)
e1a79915
RS
406#endif
407
7e765675 408#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
9602b6a1 409#if !defined (__zarch__)
021b3949
JJ
410#define smul_ppmm(xh, xl, m0, m1) \
411 do { \
412 union {DItype __ll; \
413 struct {USItype __h, __l;} __i; \
7e765675
UW
414 } __x; \
415 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
416 : "=&r" (__x.__ll) \
417 : "r" (m0), "r" (m1)); \
418 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
021b3949
JJ
419 } while (0)
420#define sdiv_qrnnd(q, r, n1, n0, d) \
421 do { \
422 union {DItype __ll; \
423 struct {USItype __h, __l;} __i; \
7e765675
UW
424 } __x; \
425 __x.__i.__h = n1; __x.__i.__l = n0; \
021b3949 426 __asm__ ("dr %0,%2" \
7e765675
UW
427 : "=r" (__x.__ll) \
428 : "0" (__x.__ll), "r" (d)); \
429 (q) = __x.__i.__l; (r) = __x.__i.__h; \
021b3949 430 } while (0)
9602b6a1
AK
431#else
432#define smul_ppmm(xh, xl, m0, m1) \
433 do { \
aecb6197
AK
434 register SItype __r0 __asm__ ("0"); \
435 register SItype __r1 __asm__ ("1") = (m0); \
0d03ac1b 436 \
9602b6a1 437 __asm__ ("mr\t%%r0,%3" \
0d03ac1b
L
438 : "=r" (__r0), "=r" (__r1) \
439 : "r" (__r1), "r" (m1)); \
aecb6197 440 (xh) = __r0; (xl) = __r1; \
9602b6a1 441 } while (0)
aecb6197 442
9602b6a1 443#define sdiv_qrnnd(q, r, n1, n0, d) \
0d03ac1b 444 do { \
aecb6197
AK
445 register SItype __r0 __asm__ ("0") = (n1); \
446 register SItype __r1 __asm__ ("1") = (n0); \
0d03ac1b 447 \
aecb6197 448 __asm__ ("dr\t%%r0,%4" \
0d03ac1b
L
449 : "=r" (__r0), "=r" (__r1) \
450 : "r" (__r0), "r" (__r1), "r" (d)); \
aecb6197 451 (q) = __r1; (r) = __r0; \
9602b6a1
AK
452 } while (0)
453#endif /* __zarch__ */
021b3949
JJ
454#endif
455
456#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
e1a79915 457#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
9fe2319e 458 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
0f41302f
MS
459 : "=r" ((USItype) (sh)), \
460 "=&r" ((USItype) (sl)) \
461 : "%0" ((USItype) (ah)), \
462 "g" ((USItype) (bh)), \
463 "%1" ((USItype) (al)), \
464 "g" ((USItype) (bl)))
e1a79915 465#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
9fe2319e 466 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
0f41302f
MS
467 : "=r" ((USItype) (sh)), \
468 "=&r" ((USItype) (sl)) \
469 : "0" ((USItype) (ah)), \
470 "g" ((USItype) (bh)), \
471 "1" ((USItype) (al)), \
472 "g" ((USItype) (bl)))
e1a79915 473#define umul_ppmm(w1, w0, u, v) \
9fe2319e 474 __asm__ ("mul{l} %3" \
0f41302f
MS
475 : "=a" ((USItype) (w0)), \
476 "=d" ((USItype) (w1)) \
477 : "%0" ((USItype) (u)), \
478 "rm" ((USItype) (v)))
2a0e04e2 479#define udiv_qrnnd(q, r, n1, n0, dv) \
9fe2319e 480 __asm__ ("div{l} %4" \
0f41302f
MS
481 : "=a" ((USItype) (q)), \
482 "=d" ((USItype) (r)) \
483 : "0" ((USItype) (n0)), \
484 "1" ((USItype) (n1)), \
2a0e04e2 485 "rm" ((USItype) (dv)))
e7e0aaec
UB
486#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
487#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
d83dd29a
TG
488#define UMUL_TIME 40
489#define UDIV_TIME 40
e1a79915
RS
490#endif /* 80x86 */
491
8df07a2c 492#if defined (__x86_64__) && W_TYPE_SIZE == 64
e7e0aaec 493#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
9fe2319e 494 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
e7e0aaec
UB
495 : "=r" ((UDItype) (sh)), \
496 "=&r" ((UDItype) (sl)) \
497 : "%0" ((UDItype) (ah)), \
498 "rme" ((UDItype) (bh)), \
499 "%1" ((UDItype) (al)), \
500 "rme" ((UDItype) (bl)))
501#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
9fe2319e 502 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
e7e0aaec
UB
503 : "=r" ((UDItype) (sh)), \
504 "=&r" ((UDItype) (sl)) \
505 : "0" ((UDItype) (ah)), \
506 "rme" ((UDItype) (bh)), \
507 "1" ((UDItype) (al)), \
508 "rme" ((UDItype) (bl)))
509#define umul_ppmm(w1, w0, u, v) \
9fe2319e 510 __asm__ ("mul{q} %3" \
e7e0aaec
UB
511 : "=a" ((UDItype) (w0)), \
512 "=d" ((UDItype) (w1)) \
513 : "%0" ((UDItype) (u)), \
514 "rm" ((UDItype) (v)))
515#define udiv_qrnnd(q, r, n1, n0, dv) \
9fe2319e 516 __asm__ ("div{q} %4" \
e7e0aaec
UB
517 : "=a" ((UDItype) (q)), \
518 "=d" ((UDItype) (r)) \
519 : "0" ((UDItype) (n0)), \
520 "1" ((UDItype) (n1)), \
521 "rm" ((UDItype) (dv)))
10937b0c
L
522#define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
523#define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
e7e0aaec
UB
524#define UMUL_TIME 40
525#define UDIV_TIME 40
526#endif /* x86_64 */
527
021b3949 528#if defined (__i960__) && W_TYPE_SIZE == 32
f71c71f1
TG
529#define umul_ppmm(w1, w0, u, v) \
530 ({union {UDItype __ll; \
531 struct {USItype __l, __h;} __i; \
532 } __xx; \
533 __asm__ ("emul %2,%1,%0" \
534 : "=d" (__xx.__ll) \
0f41302f
MS
535 : "%dI" ((USItype) (u)), \
536 "dI" ((USItype) (v))); \
f71c71f1
TG
537 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
538#define __umulsidi3(u, v) \
539 ({UDItype __w; \
540 __asm__ ("emul %2,%1,%0" \
541 : "=d" (__w) \
0f41302f
MS
542 : "%dI" ((USItype) (u)), \
543 "dI" ((USItype) (v))); \
021b3949 544 __w; })
f71c71f1 545#endif /* __i960__ */
e1a79915 546
c252db20
L
547#if defined (__ia64) && W_TYPE_SIZE == 64
548/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
549 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
550 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
551 register, which takes an extra cycle. */
552#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
553 do { \
554 UWtype __x; \
555 __x = (al) - (bl); \
556 if ((al) < (bl)) \
557 (sh) = (ah) - (bh) - 1; \
558 else \
559 (sh) = (ah) - (bh); \
560 (sl) = __x; \
561 } while (0)
562
563/* Do both product parts in assembly, since that gives better code with
564 all gcc versions. Some callers will just use the upper part, and in
565 that situation we waste an instruction, but not any cycles. */
566#define umul_ppmm(ph, pl, m0, m1) \
567 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
568 : "=&f" (ph), "=f" (pl) \
569 : "f" (m0), "f" (m1))
570#define count_leading_zeros(count, x) \
571 do { \
572 UWtype _x = (x), _y, _a, _c; \
573 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
574 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
575 _c = (_a - 1) << 3; \
576 _x >>= _c; \
577 if (_x >= 1 << 4) \
578 _x >>= 4, _c += 4; \
579 if (_x >= 1 << 2) \
580 _x >>= 2, _c += 2; \
581 _c += _x >> 1; \
582 (count) = W_TYPE_SIZE - 1 - _c; \
583 } while (0)
584/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
585 based, and we don't need a special case for x==0 here */
586#define count_trailing_zeros(count, x) \
587 do { \
588 UWtype __ctz_x = (x); \
589 __asm__ ("popcnt %0 = %1" \
590 : "=r" (count) \
591 : "r" ((__ctz_x-1) & ~__ctz_x)); \
592 } while (0)
593#define UMUL_TIME 14
594#endif
595
6f653a2c
XR
596#ifdef __loongarch__
597# if W_TYPE_SIZE == 32
598# define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
599# define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
600# define COUNT_LEADING_ZEROS_0 32
601# elif W_TYPE_SIZE == 64
602# define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
603# define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
604# define COUNT_LEADING_ZEROS_0 64
605# endif
606#endif
607
021b3949 608#if defined (__M32R__) && W_TYPE_SIZE == 32
be20c0ad
DE
609#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
610 /* The cmp clears the condition bit. */ \
de5c90ff 611 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
be20c0ad
DE
612 : "=r" ((USItype) (sh)), \
613 "=&r" ((USItype) (sl)) \
de5c90ff 614 : "0" ((USItype) (ah)), \
be20c0ad 615 "r" ((USItype) (bh)), \
de5c90ff 616 "1" ((USItype) (al)), \
be20c0ad
DE
617 "r" ((USItype) (bl)) \
618 : "cbit")
619#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
620 /* The cmp clears the condition bit. */ \
de5c90ff 621 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
be20c0ad
DE
622 : "=r" ((USItype) (sh)), \
623 "=&r" ((USItype) (sl)) \
624 : "0" ((USItype) (ah)), \
625 "r" ((USItype) (bh)), \
626 "1" ((USItype) (al)), \
627 "r" ((USItype) (bl)) \
628 : "cbit")
629#endif /* __M32R__ */
630
021b3949 631#if defined (__mc68000__) && W_TYPE_SIZE == 32
e1a79915 632#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 633 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
0f41302f
MS
634 : "=d" ((USItype) (sh)), \
635 "=&d" ((USItype) (sl)) \
636 : "%0" ((USItype) (ah)), \
637 "d" ((USItype) (bh)), \
638 "%1" ((USItype) (al)), \
639 "g" ((USItype) (bl)))
e1a79915 640#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 641 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
0f41302f
MS
642 : "=d" ((USItype) (sh)), \
643 "=&d" ((USItype) (sl)) \
644 : "0" ((USItype) (ah)), \
645 "d" ((USItype) (bh)), \
646 "1" ((USItype) (al)), \
647 "g" ((USItype) (bl)))
f3a5c9a4 648
e5d95b69 649/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
3704aae9 650#if (defined (__mc68020__) && !defined (__mc68060__))
e1a79915
RS
651#define umul_ppmm(w1, w0, u, v) \
652 __asm__ ("mulu%.l %3,%1:%0" \
0f41302f
MS
653 : "=d" ((USItype) (w0)), \
654 "=d" ((USItype) (w1)) \
655 : "%0" ((USItype) (u)), \
656 "dmi" ((USItype) (v)))
d83dd29a 657#define UMUL_TIME 45
e1a79915
RS
658#define udiv_qrnnd(q, r, n1, n0, d) \
659 __asm__ ("divu%.l %4,%1:%0" \
0f41302f
MS
660 : "=d" ((USItype) (q)), \
661 "=d" ((USItype) (r)) \
662 : "0" ((USItype) (n0)), \
663 "1" ((USItype) (n1)), \
664 "dmi" ((USItype) (d)))
d83dd29a
TG
665#define UDIV_TIME 90
666#define sdiv_qrnnd(q, r, n1, n0, d) \
667 __asm__ ("divs%.l %4,%1:%0" \
0f41302f
MS
668 : "=d" ((USItype) (q)), \
669 "=d" ((USItype) (r)) \
670 : "0" ((USItype) (n0)), \
671 "1" ((USItype) (n1)), \
672 "dmi" ((USItype) (d)))
f3a5c9a4 673
3704aae9
RZ
674#elif defined (__mcoldfire__) /* not mc68020 */
675
bab4470d
PB
676#define umul_ppmm(xh, xl, a, b) \
677 __asm__ ("| Inlined umul_ppmm\n" \
678 " move%.l %2,%/d0\n" \
679 " move%.l %3,%/d1\n" \
680 " move%.l %/d0,%/d2\n" \
681 " swap %/d0\n" \
682 " move%.l %/d1,%/d3\n" \
683 " swap %/d1\n" \
684 " move%.w %/d2,%/d4\n" \
685 " mulu %/d3,%/d4\n" \
686 " mulu %/d1,%/d2\n" \
687 " mulu %/d0,%/d3\n" \
688 " mulu %/d0,%/d1\n" \
689 " move%.l %/d4,%/d0\n" \
690 " clr%.w %/d0\n" \
691 " swap %/d0\n" \
692 " add%.l %/d0,%/d2\n" \
693 " add%.l %/d3,%/d2\n" \
694 " jcc 1f\n" \
695 " add%.l %#65536,%/d1\n" \
696 "1: swap %/d2\n" \
697 " moveq %#0,%/d0\n" \
698 " move%.w %/d2,%/d0\n" \
699 " move%.w %/d4,%/d2\n" \
700 " move%.l %/d2,%1\n" \
701 " add%.l %/d1,%/d0\n" \
702 " move%.l %/d0,%0" \
703 : "=g" ((USItype) (xh)), \
704 "=g" ((USItype) (xl)) \
705 : "g" ((USItype) (a)), \
706 "g" ((USItype) (b)) \
707 : "d0", "d1", "d2", "d3", "d4")
708#define UMUL_TIME 100
709#define UDIV_TIME 400
710#else /* not ColdFire */
b24b1d78 711/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
e1a79915 712#define umul_ppmm(xh, xl, a, b) \
7efe41a9
RH
713 __asm__ ("| Inlined umul_ppmm\n" \
714 " move%.l %2,%/d0\n" \
715 " move%.l %3,%/d1\n" \
716 " move%.l %/d0,%/d2\n" \
717 " swap %/d0\n" \
718 " move%.l %/d1,%/d3\n" \
719 " swap %/d1\n" \
720 " move%.w %/d2,%/d4\n" \
721 " mulu %/d3,%/d4\n" \
722 " mulu %/d1,%/d2\n" \
723 " mulu %/d0,%/d3\n" \
724 " mulu %/d0,%/d1\n" \
725 " move%.l %/d4,%/d0\n" \
726 " eor%.w %/d0,%/d0\n" \
727 " swap %/d0\n" \
728 " add%.l %/d0,%/d2\n" \
729 " add%.l %/d3,%/d2\n" \
730 " jcc 1f\n" \
731 " add%.l %#65536,%/d1\n" \
732 "1: swap %/d2\n" \
733 " moveq %#0,%/d0\n" \
734 " move%.w %/d2,%/d0\n" \
735 " move%.w %/d4,%/d2\n" \
736 " move%.l %/d2,%1\n" \
737 " add%.l %/d1,%/d0\n" \
738 " move%.l %/d0,%0" \
0f41302f
MS
739 : "=g" ((USItype) (xh)), \
740 "=g" ((USItype) (xl)) \
741 : "g" ((USItype) (a)), \
f3a5c9a4 742 "g" ((USItype) (b)) \
d83dd29a
TG
743 : "d0", "d1", "d2", "d3", "d4")
744#define UMUL_TIME 100
745#define UDIV_TIME 400
3704aae9 746
e1a79915 747#endif /* not mc68020 */
f3a5c9a4 748
32247ce9
BI
749/* The '020, '030, '040 and '060 have bitfield insns.
750 cpu32 disguises as a 68020, but lacks them. */
3704aae9 751#if defined (__mc68020__) && !defined (__mcpu32__)
f3a5c9a4 752#define count_leading_zeros(count, x) \
4b3d1177 753 __asm__ ("bfffo %1{%b2:%b2},%0" \
f3a5c9a4
RK
754 : "=d" ((USItype) (count)) \
755 : "od" ((USItype) (x)), "n" (0))
7a6525d6
SL
756/* Some ColdFire architectures have a ff1 instruction supported via
757 __builtin_clz. */
758#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
759#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
760#define COUNT_LEADING_ZEROS_0 32
f3a5c9a4 761#endif
e1a79915
RS
762#endif /* mc68000 */
763
021b3949 764#if defined (__m88000__) && W_TYPE_SIZE == 32
e1a79915 765#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 766 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
0f41302f
MS
767 : "=r" ((USItype) (sh)), \
768 "=&r" ((USItype) (sl)) \
769 : "%rJ" ((USItype) (ah)), \
770 "rJ" ((USItype) (bh)), \
771 "%rJ" ((USItype) (al)), \
772 "rJ" ((USItype) (bl)))
e1a79915 773#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 774 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
0f41302f
MS
775 : "=r" ((USItype) (sh)), \
776 "=&r" ((USItype) (sl)) \
777 : "rJ" ((USItype) (ah)), \
778 "rJ" ((USItype) (bh)), \
779 "rJ" ((USItype) (al)), \
780 "rJ" ((USItype) (bl)))
e1a79915
RS
781#define count_leading_zeros(count, x) \
782 do { \
e6c5404d 783 USItype __cbtmp; \
e1a79915 784 __asm__ ("ff1 %0,%1" \
d83dd29a 785 : "=r" (__cbtmp) \
0f41302f 786 : "r" ((USItype) (x))); \
e1a79915
RS
787 (count) = __cbtmp ^ 31; \
788 } while (0)
021b3949 789#define COUNT_LEADING_ZEROS_0 63 /* sic */
d83dd29a 790#if defined (__mc88110__)
f71c71f1
TG
791#define umul_ppmm(wh, wl, u, v) \
792 do { \
793 union {UDItype __ll; \
794 struct {USItype __h, __l;} __i; \
795 } __xx; \
796 __asm__ ("mulu.d %0,%1,%2" \
797 : "=r" (__xx.__ll) \
0f41302f
MS
798 : "r" ((USItype) (u)), \
799 "r" ((USItype) (v))); \
f71c71f1
TG
800 (wh) = __xx.__i.__h; \
801 (wl) = __xx.__i.__l; \
802 } while (0)
d83dd29a 803#define udiv_qrnnd(q, r, n1, n0, d) \
f71c71f1
TG
804 ({union {UDItype __ll; \
805 struct {USItype __h, __l;} __i; \
806 } __xx; \
807 USItype __q; \
808 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
809 __asm__ ("divu.d %0,%1,%2" \
810 : "=r" (__q) \
811 : "r" (__xx.__ll), \
0f41302f 812 "r" ((USItype) (d))); \
f71c71f1
TG
813 (r) = (n0) - __q * (d); (q) = __q; })
814#define UMUL_TIME 5
815#define UDIV_TIME 25
816#else
817#define UMUL_TIME 17
818#define UDIV_TIME 150
819#endif /* __mc88110__ */
e1a79915
RS
820#endif /* __m88000__ */
821
27098b6b
RH
822#if defined (__mn10300__)
823# if defined (__AM33__)
824# define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
825# define umul_ppmm(w1, w0, u, v) \
826 asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
827# define smul_ppmm(w1, w0, u, v) \
828 asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
829# else
830# define umul_ppmm(w1, w0, u, v) \
831 asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
832# define smul_ppmm(w1, w0, u, v) \
833 asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
834# endif
835# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
836 do { \
837 DWunion __s, __a, __b; \
838 __a.s.low = (al); __a.s.high = (ah); \
839 __b.s.low = (bl); __b.s.high = (bh); \
840 __s.ll = __a.ll + __b.ll; \
841 (sl) = __s.s.low; (sh) = __s.s.high; \
842 } while (0)
843# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
844 do { \
845 DWunion __s, __a, __b; \
846 __a.s.low = (al); __a.s.high = (ah); \
847 __b.s.low = (bl); __b.s.high = (bh); \
848 __s.ll = __a.ll - __b.ll; \
849 (sl) = __s.s.low; (sh) = __s.s.high; \
850 } while (0)
851# define udiv_qrnnd(q, r, nh, nl, d) \
852 asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
853# define sdiv_qrnnd(q, r, nh, nl, d) \
854 asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
855# define UMUL_TIME 3
856# define UDIV_TIME 38
857#endif
858
021b3949 859#if defined (__mips__) && W_TYPE_SIZE == 32
21dfc6dc
RS
860#define umul_ppmm(w1, w0, u, v) \
861 do { \
862 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
863 (w1) = (USItype) (__x >> 32); \
864 (w0) = (USItype) (__x); \
865 } while (0)
f71c71f1 866#define UMUL_TIME 10
e1a79915 867#define UDIV_TIME 100
44f9fcef 868
b6799f37 869#if (__mips == 32 || __mips == 64) && ! defined (__mips16)
44f9fcef
SL
870#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
871#define COUNT_LEADING_ZEROS_0 32
872#endif
e1a79915
RS
873#endif /* __mips__ */
874
f34fc46e
DE
875/* FIXME: We should test _IBMR2 here when we add assembly support for the
876 system vendor compilers.
877 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
878 enough, since that hits ARM and m68k too. */
879#if (defined (_ARCH_PPC) /* AIX */ \
f34fc46e
DE
880 || defined (__powerpc__) /* gcc */ \
881 || defined (__POWERPC__) /* BEOS */ \
882 || defined (__ppc__) /* Darwin */ \
fb28435f
AM
883 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
884 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
0d03ac1b 885 && CPU_FAMILY == PPC) \
f34fc46e 886 ) && W_TYPE_SIZE == 32
f71c71f1
TG
887#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
888 do { \
889 if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 890 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
f34fc46e
DE
891 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
892 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
6b39bc38 893 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
f34fc46e 894 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
f71c71f1 895 else \
6b39bc38 896 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
f34fc46e
DE
897 : "=r" (sh), "=&r" (sl) \
898 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
f71c71f1
TG
899 } while (0)
900#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
901 do { \
902 if (__builtin_constant_p (ah) && (ah) == 0) \
6b39bc38 903 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
f34fc46e
DE
904 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
905 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
6b39bc38 906 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
f34fc46e 907 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
f71c71f1 908 else if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 909 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
f34fc46e
DE
910 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
911 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
6b39bc38 912 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
f34fc46e 913 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
f71c71f1 914 else \
6b39bc38 915 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
f34fc46e
DE
916 : "=r" (sh), "=&r" (sl) \
917 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
f71c71f1
TG
918 } while (0)
919#define count_leading_zeros(count, x) \
6b39bc38 920 __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
021b3949 921#define COUNT_LEADING_ZEROS_0 32
f34fc46e 922#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
fb28435f
AM
923 || defined (__ppc__) \
924 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
925 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
0d03ac1b 926 && CPU_FAMILY == PPC)
f71c71f1
TG
927#define umul_ppmm(ph, pl, m0, m1) \
928 do { \
929 USItype __m0 = (m0), __m1 = (m1); \
f34fc46e 930 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
f71c71f1
TG
931 (pl) = __m0 * __m1; \
932 } while (0)
933#define UMUL_TIME 15
934#define smul_ppmm(ph, pl, m0, m1) \
935 do { \
936 SItype __m0 = (m0), __m1 = (m1); \
f34fc46e 937 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
f71c71f1
TG
938 (pl) = __m0 * __m1; \
939 } while (0)
940#define SMUL_TIME 14
941#define UDIV_TIME 120
f71c71f1 942#endif
f34fc46e
DE
943#endif /* 32-bit POWER architecture variants. */
944
945/* We should test _IBMR2 here when we add assembly support for the system
946 vendor compilers. */
947#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
948#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
949 do { \
950 if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 951 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
f34fc46e
DE
952 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
953 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
6b39bc38 954 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
f34fc46e
DE
955 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
956 else \
6b39bc38 957 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
f34fc46e
DE
958 : "=r" (sh), "=&r" (sl) \
959 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
960 } while (0)
961#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
962 do { \
963 if (__builtin_constant_p (ah) && (ah) == 0) \
6b39bc38 964 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
f34fc46e
DE
965 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
966 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
6b39bc38 967 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
f34fc46e
DE
968 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
969 else if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 970 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
f34fc46e
DE
971 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
972 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
6b39bc38 973 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
f34fc46e
DE
974 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
975 else \
6b39bc38 976 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
f34fc46e
DE
977 : "=r" (sh), "=&r" (sl) \
978 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
979 } while (0)
980#define count_leading_zeros(count, x) \
981 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
982#define COUNT_LEADING_ZEROS_0 64
983#define umul_ppmm(ph, pl, m0, m1) \
984 do { \
985 UDItype __m0 = (m0), __m1 = (m1); \
986 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
987 (pl) = __m0 * __m1; \
988 } while (0)
989#define UMUL_TIME 15
990#define smul_ppmm(ph, pl, m0, m1) \
991 do { \
992 DItype __m0 = (m0), __m1 = (m1); \
993 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
994 (pl) = __m0 * __m1; \
995 } while (0)
996#define SMUL_TIME 14 /* ??? */
997#define UDIV_TIME 120 /* ??? */
998#endif /* 64-bit PowerPC. */
f71c71f1 999
021b3949 1000#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
e1a79915 1001#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 1002 __asm__ ("a %1,%5\n\tae %0,%3" \
0f41302f
MS
1003 : "=r" ((USItype) (sh)), \
1004 "=&r" ((USItype) (sl)) \
1005 : "%0" ((USItype) (ah)), \
1006 "r" ((USItype) (bh)), \
1007 "%1" ((USItype) (al)), \
1008 "r" ((USItype) (bl)))
e1a79915 1009#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 1010 __asm__ ("s %1,%5\n\tse %0,%3" \
0f41302f
MS
1011 : "=r" ((USItype) (sh)), \
1012 "=&r" ((USItype) (sl)) \
1013 : "0" ((USItype) (ah)), \
1014 "r" ((USItype) (bh)), \
1015 "1" ((USItype) (al)), \
1016 "r" ((USItype) (bl)))
e1a79915
RS
1017#define umul_ppmm(ph, pl, m0, m1) \
1018 do { \
e6c5404d 1019 USItype __m0 = (m0), __m1 = (m1); \
e1a79915 1020 __asm__ ( \
7efe41a9
RH
1021 "s r2,r2\n" \
1022" mts r10,%2\n" \
1023" m r2,%3\n" \
1024" m r2,%3\n" \
1025" m r2,%3\n" \
1026" m r2,%3\n" \
1027" m r2,%3\n" \
1028" m r2,%3\n" \
1029" m r2,%3\n" \
1030" m r2,%3\n" \
1031" m r2,%3\n" \
1032" m r2,%3\n" \
1033" m r2,%3\n" \
1034" m r2,%3\n" \
1035" m r2,%3\n" \
1036" m r2,%3\n" \
1037" m r2,%3\n" \
1038" m r2,%3\n" \
1039" cas %0,r2,r0\n" \
1040" mfs r10,%1" \
0f41302f
MS
1041 : "=r" ((USItype) (ph)), \
1042 "=r" ((USItype) (pl)) \
d83dd29a
TG
1043 : "%r" (__m0), \
1044 "r" (__m1) \
1045 : "r2"); \
e6c5404d
TG
1046 (ph) += ((((SItype) __m0 >> 31) & __m1) \
1047 + (((SItype) __m1 >> 31) & __m0)); \
e1a79915 1048 } while (0)
d83dd29a
TG
1049#define UMUL_TIME 20
1050#define UDIV_TIME 200
e1a79915
RS
1051#define count_leading_zeros(count, x) \
1052 do { \
1053 if ((x) >= 0x10000) \
1054 __asm__ ("clz %0,%1" \
0f41302f
MS
1055 : "=r" ((USItype) (count)) \
1056 : "r" ((USItype) (x) >> 16)); \
e1a79915
RS
1057 else \
1058 { \
1059 __asm__ ("clz %0,%1" \
0f41302f
MS
1060 : "=r" ((USItype) (count)) \
1061 : "r" ((USItype) (x))); \
e1a79915
RS
1062 (count) += 16; \
1063 } \
1064 } while (0)
1065#endif
1066
ee1c2133
KC
1067#if defined(__riscv)
1068#ifdef __riscv_mul
1069#define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
1070#define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
1071#else
1072#if __riscv_xlen == 32
1073 #define MULUW3 "call __mulsi3"
1074#elif __riscv_xlen == 64
1075 #define MULUW3 "call __muldi3"
1076#else
1077#error unsupport xlen
1078#endif /* __riscv_xlen */
1079/* We rely on the fact that MULUW3 doesn't clobber the t-registers.
1080 It can get better register allocation result. */
1081#define __muluw3(a, b) \
1082 ({ \
1083 register UWtype __op0 asm ("a0") = a; \
1084 register UWtype __op1 asm ("a1") = b; \
1085 asm volatile (MULUW3 \
1086 : "+r" (__op0), "+r" (__op1) \
1087 : \
1088 : "ra", "a2", "a3"); \
1089 __op0; \
1090 })
1091#endif /* __riscv_mul */
1092#define umul_ppmm(w1, w0, u, v) \
1093 do { \
1094 UWtype __x0, __x1, __x2, __x3; \
1095 UHWtype __ul, __vl, __uh, __vh; \
1096 \
1097 __ul = __ll_lowpart (u); \
1098 __uh = __ll_highpart (u); \
1099 __vl = __ll_lowpart (v); \
1100 __vh = __ll_highpart (v); \
1101 \
1102 __x0 = __muluw3 (__ul, __vl); \
1103 __x1 = __muluw3 (__ul, __vh); \
1104 __x2 = __muluw3 (__uh, __vl); \
1105 __x3 = __muluw3 (__uh, __vh); \
1106 \
1107 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1108 __x1 += __x2; /* but this indeed can */ \
1109 if (__x1 < __x2) /* did we get it? */ \
1110 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1111 \
1112 (w1) = __x3 + __ll_highpart (__x1); \
1113 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
1114 } while (0)
1115#endif /* __riscv */
1116
af952763 1117#if defined(__sh__) && W_TYPE_SIZE == 32
31b6f0ae 1118#ifndef __sh1__
021b3949
JJ
1119#define umul_ppmm(w1, w0, u, v) \
1120 __asm__ ( \
31b6f0ae
R
1121 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
1122 : "=r<" ((USItype)(w1)), \
1123 "=r<" ((USItype)(w0)) \
021b3949
JJ
1124 : "r" ((USItype)(u)), \
1125 "r" ((USItype)(v)) \
1126 : "macl", "mach")
1127#define UMUL_TIME 5
1128#endif
1129
31b6f0ae
R
1130/* This is the same algorithm as __udiv_qrnnd_c. */
1131#define UDIV_NEEDS_NORMALIZATION 1
1132
1e44e857
DJ
1133#ifdef __FDPIC__
1134/* FDPIC needs a special version of the asm fragment to extract the
1135 code address from the function descriptor. __udiv_qrnnd_16 is
1136 assumed to be local and not to use the GOT, so loading r12 is
1137 not needed. */
1138#define udiv_qrnnd(q, r, n1, n0, d) \
1139 do { \
1140 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
1141 __attribute__ ((visibility ("hidden"))); \
1142 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1143 __asm__ ( \
1144 "mov%M4 %4,r5\n" \
1145" swap.w %3,r4\n" \
1146" swap.w r5,r6\n" \
1147" mov.l @%5,r2\n" \
1148" jsr @r2\n" \
1149" shll16 r6\n" \
1150" swap.w r4,r4\n" \
1151" mov.l @%5,r2\n" \
1152" jsr @r2\n" \
1153" swap.w r1,%0\n" \
1154" or r1,%0" \
1155 : "=r" (q), "=&z" (r) \
1156 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
1157 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
1158 } while (0)
1159#else
31b6f0ae
R
1160#define udiv_qrnnd(q, r, n1, n0, d) \
1161 do { \
1162 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
0d03ac1b 1163 __attribute__ ((visibility ("hidden"))); \
31b6f0ae
R
1164 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1165 __asm__ ( \
1166 "mov%M4 %4,r5\n" \
1167" swap.w %3,r4\n" \
1168" swap.w r5,r6\n" \
1169" jsr @%5\n" \
1170" shll16 r6\n" \
1171" swap.w r4,r4\n" \
1172" jsr @%5\n" \
1173" swap.w r1,%0\n" \
1174" or r1,%0" \
1175 : "=r" (q), "=&z" (r) \
1176 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
092f3f97 1177 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
31b6f0ae 1178 } while (0)
1e44e857 1179#endif /* __FDPIC__ */
31b6f0ae
R
1180
1181#define UDIV_TIME 80
1182
1183#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1184 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
1185 : "=r" (sh), "=r" (sl) \
092f3f97 1186 : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
31b6f0ae
R
1187
1188#endif /* __sh__ */
1189
afdac905
JJ
1190#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1191 && W_TYPE_SIZE == 32
e1a79915 1192#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 1193 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
0f41302f
MS
1194 : "=r" ((USItype) (sh)), \
1195 "=&r" ((USItype) (sl)) \
1196 : "%rJ" ((USItype) (ah)), \
1197 "rI" ((USItype) (bh)), \
1198 "%rJ" ((USItype) (al)), \
1199 "rI" ((USItype) (bl)) \
d83dd29a 1200 __CLOBBER_CC)
e1a79915 1201#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 1202 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
0f41302f
MS
1203 : "=r" ((USItype) (sh)), \
1204 "=&r" ((USItype) (sl)) \
1205 : "rJ" ((USItype) (ah)), \
1206 "rI" ((USItype) (bh)), \
1207 "rJ" ((USItype) (al)), \
1208 "rI" ((USItype) (bl)) \
d83dd29a 1209 __CLOBBER_CC)
0de0d5c6
DM
1210#if defined (__sparc_v9__)
1211#define umul_ppmm(w1, w0, u, v) \
1212 do { \
1213 register USItype __g1 asm ("g1"); \
1214 __asm__ ("umul\t%2,%3,%1\n\t" \
1215 "srlx\t%1, 32, %0" \
1216 : "=r" ((USItype) (w1)), \
1217 "=r" (__g1) \
1218 : "r" ((USItype) (u)), \
1219 "r" ((USItype) (v))); \
1220 (w0) = __g1; \
1221 } while (0)
1222#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1223 __asm__ ("mov\t%2,%%y\n\t" \
1224 "udiv\t%3,%4,%0\n\t" \
1225 "umul\t%0,%4,%1\n\t" \
1226 "sub\t%3,%1,%1" \
1227 : "=&r" ((USItype) (__q)), \
1228 "=&r" ((USItype) (__r)) \
1229 : "r" ((USItype) (__n1)), \
1230 "r" ((USItype) (__n0)), \
1231 "r" ((USItype) (__d)))
1232#else
d9d3acb6 1233#if defined (__sparc_v8__)
e1a79915
RS
1234#define umul_ppmm(w1, w0, u, v) \
1235 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
0f41302f
MS
1236 : "=r" ((USItype) (w1)), \
1237 "=r" ((USItype) (w0)) \
1238 : "r" ((USItype) (u)), \
1239 "r" ((USItype) (v)))
75874af9 1240#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
e1a79915 1241 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
75874af9
KG
1242 : "=&r" ((USItype) (__q)), \
1243 "=&r" ((USItype) (__r)) \
1244 : "r" ((USItype) (__n1)), \
1245 "r" ((USItype) (__n0)), \
1246 "r" ((USItype) (__d)))
e1a79915 1247#else
d9d3acb6
JW
1248#if defined (__sparclite__)
1249/* This has hardware multiply but not divide. It also has two additional
1250 instructions scan (ffs from high bit) and divscc. */
1251#define umul_ppmm(w1, w0, u, v) \
1252 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
0f41302f
MS
1253 : "=r" ((USItype) (w1)), \
1254 "=r" ((USItype) (w0)) \
1255 : "r" ((USItype) (u)), \
1256 "r" ((USItype) (v)))
d9d3acb6 1257#define udiv_qrnnd(q, r, n1, n0, d) \
7efe41a9
RH
1258 __asm__ ("! Inlined udiv_qrnnd\n" \
1259" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1260" tst %%g0\n" \
1261" divscc %3,%4,%%g1\n" \
1262" divscc %%g1,%4,%%g1\n" \
1263" divscc %%g1,%4,%%g1\n" \
1264" divscc %%g1,%4,%%g1\n" \
1265" divscc %%g1,%4,%%g1\n" \
1266" divscc %%g1,%4,%%g1\n" \
1267" divscc %%g1,%4,%%g1\n" \
1268" divscc %%g1,%4,%%g1\n" \
1269" divscc %%g1,%4,%%g1\n" \
1270" divscc %%g1,%4,%%g1\n" \
1271" divscc %%g1,%4,%%g1\n" \
1272" divscc %%g1,%4,%%g1\n" \
1273" divscc %%g1,%4,%%g1\n" \
1274" divscc %%g1,%4,%%g1\n" \
1275" divscc %%g1,%4,%%g1\n" \
1276" divscc %%g1,%4,%%g1\n" \
1277" divscc %%g1,%4,%%g1\n" \
1278" divscc %%g1,%4,%%g1\n" \
1279" divscc %%g1,%4,%%g1\n" \
1280" divscc %%g1,%4,%%g1\n" \
1281" divscc %%g1,%4,%%g1\n" \
1282" divscc %%g1,%4,%%g1\n" \
1283" divscc %%g1,%4,%%g1\n" \
1284" divscc %%g1,%4,%%g1\n" \
1285" divscc %%g1,%4,%%g1\n" \
1286" divscc %%g1,%4,%%g1\n" \
1287" divscc %%g1,%4,%%g1\n" \
1288" divscc %%g1,%4,%%g1\n" \
1289" divscc %%g1,%4,%%g1\n" \
1290" divscc %%g1,%4,%%g1\n" \
1291" divscc %%g1,%4,%%g1\n" \
1292" divscc %%g1,%4,%0\n" \
1293" rd %%y,%1\n" \
1294" bl,a 1f\n" \
1295" add %1,%4,%1\n" \
1296"1: ! End of inline udiv_qrnnd" \
0f41302f
MS
1297 : "=r" ((USItype) (q)), \
1298 "=r" ((USItype) (r)) \
1299 : "r" ((USItype) (n1)), \
1300 "r" ((USItype) (n0)), \
1301 "rI" ((USItype) (d)) \
4de0633f 1302 : "g1" __AND_CLOBBER_CC)
d9d3acb6
JW
1303#define UDIV_TIME 37
1304#define count_leading_zeros(count, x) \
84530511
SC
1305 do { \
1306 __asm__ ("scan %1,1,%0" \
0d03ac1b
L
1307 : "=r" ((USItype) (count)) \
1308 : "r" ((USItype) (x))); \
021b3949
JJ
1309 } while (0)
1310/* Early sparclites return 63 for an argument of 0, but they warn that future
1311 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1312 undefined. */
d9d3acb6 1313#else
e1a79915
RS
1314/* SPARC without integer multiplication and divide instructions.
1315 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1316#define umul_ppmm(w1, w0, u, v) \
7efe41a9
RH
1317 __asm__ ("! Inlined umul_ppmm\n" \
1318" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
1319" sra %3,31,%%o5 ! Don't move this insn\n" \
1320" and %2,%%o5,%%o5 ! Don't move this insn\n" \
1321" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1322" mulscc %%g1,%3,%%g1\n" \
1323" mulscc %%g1,%3,%%g1\n" \
1324" mulscc %%g1,%3,%%g1\n" \
1325" mulscc %%g1,%3,%%g1\n" \
1326" mulscc %%g1,%3,%%g1\n" \
1327" mulscc %%g1,%3,%%g1\n" \
1328" mulscc %%g1,%3,%%g1\n" \
1329" mulscc %%g1,%3,%%g1\n" \
1330" mulscc %%g1,%3,%%g1\n" \
1331" mulscc %%g1,%3,%%g1\n" \
1332" mulscc %%g1,%3,%%g1\n" \
1333" mulscc %%g1,%3,%%g1\n" \
1334" mulscc %%g1,%3,%%g1\n" \
1335" mulscc %%g1,%3,%%g1\n" \
1336" mulscc %%g1,%3,%%g1\n" \
1337" mulscc %%g1,%3,%%g1\n" \
1338" mulscc %%g1,%3,%%g1\n" \
1339" mulscc %%g1,%3,%%g1\n" \
1340" mulscc %%g1,%3,%%g1\n" \
1341" mulscc %%g1,%3,%%g1\n" \
1342" mulscc %%g1,%3,%%g1\n" \
1343" mulscc %%g1,%3,%%g1\n" \
1344" mulscc %%g1,%3,%%g1\n" \
1345" mulscc %%g1,%3,%%g1\n" \
1346" mulscc %%g1,%3,%%g1\n" \
1347" mulscc %%g1,%3,%%g1\n" \
1348" mulscc %%g1,%3,%%g1\n" \
1349" mulscc %%g1,%3,%%g1\n" \
1350" mulscc %%g1,%3,%%g1\n" \
1351" mulscc %%g1,%3,%%g1\n" \
1352" mulscc %%g1,%3,%%g1\n" \
1353" mulscc %%g1,%3,%%g1\n" \
1354" mulscc %%g1,0,%%g1\n" \
1355" add %%g1,%%o5,%0\n" \
1356" rd %%y,%1" \
0f41302f
MS
1357 : "=r" ((USItype) (w1)), \
1358 "=r" ((USItype) (w0)) \
1359 : "%rI" ((USItype) (u)), \
1360 "r" ((USItype) (v)) \
021b3949 1361 : "g1", "o5" __AND_CLOBBER_CC)
e1a79915
RS
1362#define UMUL_TIME 39 /* 39 instructions */
1363/* It's quite necessary to add this much assembler for the sparc.
1364 The default udiv_qrnnd (in C) is more than 10 times slower! */
75874af9 1365#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
7efe41a9
RH
1366 __asm__ ("! Inlined udiv_qrnnd\n" \
1367" mov 32,%%g1\n" \
1368" subcc %1,%2,%%g0\n" \
1369"1: bcs 5f\n" \
1370" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
1371" sub %1,%2,%1 ! this kills msb of n\n" \
1372" addx %1,%1,%1 ! so this can't give carry\n" \
1373" subcc %%g1,1,%%g1\n" \
1374"2: bne 1b\n" \
1375" subcc %1,%2,%%g0\n" \
1376" bcs 3f\n" \
1377" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
1378" b 3f\n" \
1379" sub %1,%2,%1 ! this kills msb of n\n" \
1380"4: sub %1,%2,%1\n" \
1381"5: addxcc %1,%1,%1\n" \
1382" bcc 2b\n" \
1383" subcc %%g1,1,%%g1\n" \
1384"! Got carry from n. Subtract next step to cancel this carry.\n" \
1385" bne 4b\n" \
1386" addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
1387" sub %1,%2,%1\n" \
1388"3: xnor %0,0,%0\n" \
1389" ! End of inline udiv_qrnnd" \
75874af9
KG
1390 : "=&r" ((USItype) (__q)), \
1391 "=&r" ((USItype) (__r)) \
1392 : "r" ((USItype) (__d)), \
1393 "1" ((USItype) (__n1)), \
1394 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
eebedaa5 1395#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
d9d3acb6
JW
1396#endif /* __sparclite__ */
1397#endif /* __sparc_v8__ */
0de0d5c6 1398#endif /* __sparc_v9__ */
afdac905 1399#endif /* sparc32 */
e1a79915 1400
afdac905
JJ
1401#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1402 && W_TYPE_SIZE == 64
021b3949 1403#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
0de0d5c6
DM
1404 do { \
1405 UDItype __carry = 0; \
1406 __asm__ ("addcc\t%r5,%6,%1\n\t" \
1407 "add\t%r3,%4,%0\n\t" \
1408 "movcs\t%%xcc, 1, %2\n\t" \
1409 "add\t%0, %2, %0" \
1410 : "=r" ((UDItype)(sh)), \
1411 "=&r" ((UDItype)(sl)), \
1412 "+r" (__carry) \
1413 : "%rJ" ((UDItype)(ah)), \
1414 "rI" ((UDItype)(bh)), \
1415 "%rJ" ((UDItype)(al)), \
1416 "rI" ((UDItype)(bl)) \
1417 __CLOBBER_CC); \
1418 } while (0)
021b3949 1419
0de0d5c6
DM
1420#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1421 do { \
1422 UDItype __carry = 0; \
1423 __asm__ ("subcc\t%r5,%6,%1\n\t" \
1424 "sub\t%r3,%4,%0\n\t" \
1425 "movcs\t%%xcc, 1, %2\n\t" \
7b4cf266 1426 "sub\t%0, %2, %0" \
0de0d5c6
DM
1427 : "=r" ((UDItype)(sh)), \
1428 "=&r" ((UDItype)(sl)), \
1429 "+r" (__carry) \
1430 : "%rJ" ((UDItype)(ah)), \
1431 "rI" ((UDItype)(bh)), \
1432 "%rJ" ((UDItype)(al)), \
1433 "rI" ((UDItype)(bl)) \
1434 __CLOBBER_CC); \
1435 } while (0)
021b3949
JJ
1436
1437#define umul_ppmm(wh, wl, u, v) \
1438 do { \
1439 UDItype tmp1, tmp2, tmp3, tmp4; \
1440 __asm__ __volatile__ ( \
7efe41a9
RH
1441 "srl %7,0,%3\n\t" \
1442 "mulx %3,%6,%1\n\t" \
1443 "srlx %6,32,%2\n\t" \
1444 "mulx %2,%3,%4\n\t" \
1445 "sllx %4,32,%5\n\t" \
1446 "srl %6,0,%3\n\t" \
1447 "sub %1,%5,%5\n\t" \
1448 "srlx %5,32,%5\n\t" \
1449 "addcc %4,%5,%4\n\t" \
1450 "srlx %7,32,%5\n\t" \
1451 "mulx %3,%5,%3\n\t" \
1452 "mulx %2,%5,%5\n\t" \
1453 "sethi %%hi(0x80000000),%2\n\t" \
1454 "addcc %4,%3,%4\n\t" \
1455 "srlx %4,32,%4\n\t" \
1456 "add %2,%2,%2\n\t" \
1457 "movcc %%xcc,%%g0,%2\n\t" \
1458 "addcc %5,%4,%5\n\t" \
1459 "sllx %3,32,%3\n\t" \
1460 "add %1,%3,%1\n\t" \
1461 "add %5,%2,%0" \
021b3949
JJ
1462 : "=r" ((UDItype)(wh)), \
1463 "=&r" ((UDItype)(wl)), \
1464 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
1465 : "r" ((UDItype)(u)), \
1466 "r" ((UDItype)(v)) \
1467 __CLOBBER_CC); \
1468 } while (0)
1469#define UMUL_TIME 96
1470#define UDIV_TIME 230
afdac905 1471#endif /* sparc64 */
021b3949
JJ
1472
1473#if defined (__vax__) && W_TYPE_SIZE == 32
e1a79915 1474#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 1475 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
0f41302f
MS
1476 : "=g" ((USItype) (sh)), \
1477 "=&g" ((USItype) (sl)) \
1478 : "%0" ((USItype) (ah)), \
1479 "g" ((USItype) (bh)), \
1480 "%1" ((USItype) (al)), \
1481 "g" ((USItype) (bl)))
e1a79915 1482#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 1483 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
0f41302f
MS
1484 : "=g" ((USItype) (sh)), \
1485 "=&g" ((USItype) (sl)) \
1486 : "0" ((USItype) (ah)), \
1487 "g" ((USItype) (bh)), \
1488 "1" ((USItype) (al)), \
1489 "g" ((USItype) (bl)))
e1a79915
RS
1490#define umul_ppmm(xh, xl, m0, m1) \
1491 do { \
e6c5404d
TG
1492 union { \
1493 UDItype __ll; \
1494 struct {USItype __l, __h;} __i; \
1495 } __xx; \
1496 USItype __m0 = (m0), __m1 = (m1); \
e1a79915 1497 __asm__ ("emul %1,%2,$0,%0" \
e6c5404d 1498 : "=r" (__xx.__ll) \
d83dd29a
TG
1499 : "g" (__m0), \
1500 "g" (__m1)); \
e6c5404d
TG
1501 (xh) = __xx.__i.__h; \
1502 (xl) = __xx.__i.__l; \
1503 (xh) += ((((SItype) __m0 >> 31) & __m1) \
1504 + (((SItype) __m1 >> 31) & __m0)); \
e1a79915 1505 } while (0)
f71c71f1
TG
1506#define sdiv_qrnnd(q, r, n1, n0, d) \
1507 do { \
1508 union {DItype __ll; \
1509 struct {SItype __l, __h;} __i; \
1510 } __xx; \
1511 __xx.__i.__h = n1; __xx.__i.__l = n0; \
1512 __asm__ ("ediv %3,%2,%0,%1" \
1513 : "=g" (q), "=g" (r) \
451c2f82 1514 : "g" (__xx.__ll), "g" (d)); \
f71c71f1 1515 } while (0)
e1a79915
RS
1516#endif /* __vax__ */
1517
bcead286
BS
1518#ifdef _TMS320C6X
1519#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1520 do \
1521 { \
1522 UDItype __ll; \
1523 __asm__ ("addu .l1 %1, %2, %0" \
1524 : "=a" (__ll) : "a" (al), "a" (bl)); \
1525 (sl) = (USItype)__ll; \
1526 (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \
1527 } \
1528 while (0)
1529
1530#ifdef _TMS320C6400_PLUS
1531#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1532#define umul_ppmm(w1, w0, u, v) \
1533 do { \
1534 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
1535 (w1) = (USItype) (__x >> 32); \
1536 (w0) = (USItype) (__x); \
1537 } while (0)
1538#endif /* _TMS320C6400_PLUS */
1539
1540#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
1541#ifdef _TMS320C6400
1542#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
1543#endif
1544#define UMUL_TIME 4
1545#define UDIV_TIME 40
1546#endif /* _TMS320C6X */
1547
09fa8841
BW
1548#if defined (__xtensa__) && W_TYPE_SIZE == 32
1549/* This code is not Xtensa-configuration-specific, so rely on the compiler
1550 to expand builtin functions depending on what configuration features
1551 are available. This avoids library calls when the operation can be
1552 performed in-line. */
1553#define umul_ppmm(w1, w0, u, v) \
1554 do { \
1555 DWunion __w; \
1556 __w.ll = __builtin_umulsidi3 (u, v); \
1557 w1 = __w.s.high; \
1558 w0 = __w.s.low; \
1559 } while (0)
1560#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
1561#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
1562#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
1563#endif /* __xtensa__ */
1564
ba964383
NC
1565#if defined xstormy16
1566extern UHItype __stormy16_count_leading_zeros (UHItype);
1567#define count_leading_zeros(count, x) \
1568 do \
1569 { \
1570 UHItype size; \
1571 \
1572 /* We assume that W_TYPE_SIZE is a multiple of 16... */ \
1573 for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
1574 { \
1575 UHItype c; \
9f7afa99 1576 extern UHItype __clzhi2 (UHItype); \
ba964383 1577 \
2a14229d 1578 c = __clzhi2 ((x) >> (size - 16)); \
ba964383
NC
1579 (count) += c; \
1580 if (c != 16) \
1581 break; \
1582 } \
1583 } \
1584 while (0)
1585#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1586#endif
1587
021b3949
JJ
1588#if defined (__z8000__) && W_TYPE_SIZE == 16
1589#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1590 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1591 : "=r" ((unsigned int)(sh)), \
1592 "=&r" ((unsigned int)(sl)) \
1593 : "%0" ((unsigned int)(ah)), \
1594 "r" ((unsigned int)(bh)), \
1595 "%1" ((unsigned int)(al)), \
1596 "rQR" ((unsigned int)(bl)))
1597#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1598 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1599 : "=r" ((unsigned int)(sh)), \
1600 "=&r" ((unsigned int)(sl)) \
1601 : "0" ((unsigned int)(ah)), \
1602 "r" ((unsigned int)(bh)), \
1603 "1" ((unsigned int)(al)), \
1604 "rQR" ((unsigned int)(bl)))
1605#define umul_ppmm(xh, xl, m0, m1) \
1606 do { \
1607 union {long int __ll; \
1608 struct {unsigned int __h, __l;} __i; \
1609 } __xx; \
1610 unsigned int __m0 = (m0), __m1 = (m1); \
1611 __asm__ ("mult %S0,%H3" \
1612 : "=r" (__xx.__i.__h), \
1613 "=r" (__xx.__i.__l) \
1614 : "%1" (__m0), \
1615 "rQR" (__m1)); \
1616 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1617 (xh) += ((((signed int) __m0 >> 15) & __m1) \
1618 + (((signed int) __m1 >> 15) & __m0)); \
1619 } while (0)
1620#endif /* __z8000__ */
1621
e1a79915
RS
1622#endif /* __GNUC__ */
1623
1624/* If this machine has no inline assembler, use C macros. */
1625
1626#if !defined (add_ssaaaa)
1627#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1628 do { \
021b3949 1629 UWtype __x; \
e1a79915
RS
1630 __x = (al) + (bl); \
1631 (sh) = (ah) + (bh) + (__x < (al)); \
1632 (sl) = __x; \
1633 } while (0)
1634#endif
1635
1636#if !defined (sub_ddmmss)
1637#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1638 do { \
021b3949 1639 UWtype __x; \
e1a79915
RS
1640 __x = (al) - (bl); \
1641 (sh) = (ah) - (bh) - (__x > (al)); \
1642 (sl) = __x; \
1643 } while (0)
1644#endif
1645
7e765675
UW
1646/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1647 smul_ppmm. */
1648#if !defined (umul_ppmm) && defined (smul_ppmm)
1649#define umul_ppmm(w1, w0, u, v) \
1650 do { \
1651 UWtype __w1; \
1652 UWtype __xm0 = (u), __xm1 = (v); \
1653 smul_ppmm (__w1, w0, __xm0, __xm1); \
1654 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1655 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1656 } while (0)
1657#endif
1658
1659/* If we still don't have umul_ppmm, define it using plain C. */
e1a79915
RS
1660#if !defined (umul_ppmm)
1661#define umul_ppmm(w1, w0, u, v) \
1662 do { \
021b3949
JJ
1663 UWtype __x0, __x1, __x2, __x3; \
1664 UHWtype __ul, __vl, __uh, __vh; \
e1a79915
RS
1665 \
1666 __ul = __ll_lowpart (u); \
1667 __uh = __ll_highpart (u); \
1668 __vl = __ll_lowpart (v); \
1669 __vh = __ll_highpart (v); \
1670 \
021b3949
JJ
1671 __x0 = (UWtype) __ul * __vl; \
1672 __x1 = (UWtype) __ul * __vh; \
1673 __x2 = (UWtype) __uh * __vl; \
1674 __x3 = (UWtype) __uh * __vh; \
e1a79915
RS
1675 \
1676 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1677 __x1 += __x2; /* but this indeed can */ \
1678 if (__x1 < __x2) /* did we get it? */ \
eebedaa5 1679 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
e1a79915
RS
1680 \
1681 (w1) = __x3 + __ll_highpart (__x1); \
1682 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
1683 } while (0)
1684#endif
1685
1686#if !defined (__umulsidi3)
1687#define __umulsidi3(u, v) \
996ed075 1688 ({DWunion __w; \
e1a79915
RS
1689 umul_ppmm (__w.s.high, __w.s.low, u, v); \
1690 __w.ll; })
1691#endif
1692
d83dd29a
TG
1693/* Define this unconditionally, so it can be used for debugging. */
1694#define __udiv_qrnnd_c(q, r, n1, n0, d) \
e1a79915 1695 do { \
021b3949
JJ
1696 UWtype __d1, __d0, __q1, __q0; \
1697 UWtype __r1, __r0, __m; \
e1a79915
RS
1698 __d1 = __ll_highpart (d); \
1699 __d0 = __ll_lowpart (d); \
1700 \
1701 __r1 = (n1) % __d1; \
1702 __q1 = (n1) / __d1; \
021b3949 1703 __m = (UWtype) __q1 * __d0; \
e1a79915
RS
1704 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
1705 if (__r1 < __m) \
1706 { \
1707 __q1--, __r1 += (d); \
1708 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1709 if (__r1 < __m) \
1710 __q1--, __r1 += (d); \
1711 } \
1712 __r1 -= __m; \
1713 \
1714 __r0 = __r1 % __d1; \
1715 __q0 = __r1 / __d1; \
021b3949 1716 __m = (UWtype) __q0 * __d0; \
e1a79915
RS
1717 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1718 if (__r0 < __m) \
1719 { \
1720 __q0--, __r0 += (d); \
1721 if (__r0 >= (d)) \
1722 if (__r0 < __m) \
1723 __q0--, __r0 += (d); \
1724 } \
1725 __r0 -= __m; \
1726 \
021b3949 1727 (q) = (UWtype) __q1 * __ll_B | __q0; \
e1a79915
RS
1728 (r) = __r0; \
1729 } while (0)
1814cfd9
TG
1730
1731/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
9077d7fa 1732 __udiv_w_sdiv (defined in libgcc or elsewhere). */
1814cfd9
TG
1733#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1734#define udiv_qrnnd(q, r, nh, nl, d) \
1735 do { \
5e89d0ad
AK
1736 extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \
1737 UWtype __r; \
9077d7fa 1738 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
1814cfd9
TG
1739 (r) = __r; \
1740 } while (0)
1741#endif
1742
d83dd29a
TG
1743/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1744#if !defined (udiv_qrnnd)
1745#define UDIV_NEEDS_NORMALIZATION 1
1746#define udiv_qrnnd __udiv_qrnnd_c
e1a79915
RS
1747#endif
1748
1749#if !defined (count_leading_zeros)
e1a79915
RS
1750#define count_leading_zeros(count, x) \
1751 do { \
021b3949
JJ
1752 UWtype __xr = (x); \
1753 UWtype __a; \
e1a79915 1754 \
021b3949 1755 if (W_TYPE_SIZE <= 32) \
d83dd29a 1756 { \
021b3949
JJ
1757 __a = __xr < ((UWtype)1<<2*__BITS4) \
1758 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
1759 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
d83dd29a
TG
1760 } \
1761 else \
1762 { \
021b3949 1763 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
d83dd29a
TG
1764 if (((__xr >> __a) & 0xff) != 0) \
1765 break; \
1766 } \
e1a79915 1767 \
021b3949
JJ
1768 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
1769 } while (0)
1770#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1771#endif
1772
1773#if !defined (count_trailing_zeros)
1774/* Define count_trailing_zeros using count_leading_zeros. The latter might be
1775 defined in asm, but if it is not, the C version above is good enough. */
1776#define count_trailing_zeros(count, x) \
1777 do { \
1778 UWtype __ctz_x = (x); \
1779 UWtype __ctz_c; \
1780 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
1781 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
e1a79915
RS
1782 } while (0)
1783#endif
1784
1785#ifndef UDIV_NEEDS_NORMALIZATION
1786#define UDIV_NEEDS_NORMALIZATION 0
1787#endif