]> git.ipfire.org Git - thirdparty/gcc.git/blame - include/longlong.h
Update copyright years.
[thirdparty/gcc.git] / include / longlong.h
CommitLineData
e1a79915 1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
7adcbafe 2 Copyright (C) 1991-2022 Free Software Foundation, Inc.
e1a79915 3
5a248274
JM
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
e1a79915 8 License as published by the Free Software Foundation; either
5a248274
JM
9 version 2.1 of the License, or (at your option) any later version.
10
11 In addition to the permissions in the GNU Lesser General Public
12 License, the Free Software Foundation gives you unlimited
13 permission to link the compiled version of this file into
14 combinations with other programs, and to distribute those
15 combinations without any restriction coming from the use of this
16 file. (The Lesser General Public License restrictions do apply in
17 other respects; for example, they cover modification of the file,
18 and distribution when not linked into a combine executable.)
e1a79915 19
5a248274
JM
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
e1a79915 24
5a248274 25 You should have received a copy of the GNU Lesser General Public
0d03ac1b
L
26 License along with the GNU C Library; if not, see
27 <http://www.gnu.org/licenses/>. */
e1a79915 28
021b3949
JJ
29/* You have to define the following before including this file:
30
31 UWtype -- An unsigned type, default type for operations (typically a "word")
32 UHWtype -- An unsigned type, at least half the size of UWtype.
33 UDWtype -- An unsigned type, at least twice as large a UWtype
34 W_TYPE_SIZE -- size in bits of UWtype
35
36 UQItype -- Unsigned 8 bit type.
37 SItype, USItype -- Signed and unsigned 32 bit types.
38 DItype, UDItype -- Signed and unsigned 64 bit types.
e1a79915 39
021b3949 40 On a 32 bit machine UWtype should typically be USItype;
dcfae47c 41 on a 64 bit machine, UWtype should typically be UDItype. */
021b3949
JJ
42
43#define __BITS4 (W_TYPE_SIZE / 4)
44#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48#ifndef W_TYPE_SIZE
49#define W_TYPE_SIZE 32
50#define UWtype USItype
51#define UHWtype USItype
52#define UDWtype UDItype
53#endif
e1a79915 54
5a248274
JM
55/* Used in glibc only. */
56#ifndef attribute_hidden
57#define attribute_hidden
58#endif
59
60extern const UQItype __clz_tab[256] attribute_hidden;
b4f05423 61
2e8a6600 62/* Define auxiliary asm macros.
e1a79915 63
0fa2e4df
KH
64 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
021b3949 66 word product in HIGH_PROD and LOW_PROD.
e1a79915 67
021b3949
JJ
68 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69 UDWtype product. This is just a variant of umul_ppmm.
e1a79915
RS
70
71 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
021b3949
JJ
72 denominator) divides a UDWtype, composed by the UWtype integers
73 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
75 than DENOMINATOR for correct operation. If, in addition, the most
76 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77 UDIV_NEEDS_NORMALIZATION is defined to 1.
e1a79915 78
b6d2adc7 79 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
021b3949
JJ
80 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
81 is rounded towards 0.
82
83 5) count_leading_zeros(count, x) counts the number of zero-bits from the
cc2902df 84 msb to the first nonzero bit in the UWtype X. This is the number of
021b3949
JJ
85 steps X needs to be shifted left to set the msb. Undefined for X == 0,
86 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
87
88 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89 from the least significant end.
90
91 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
92 high_addend_2, low_addend_2) adds two UWtype integers, composed by
93 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
95 (i.e. carry out) is not stored anywhere, and is lost.
96
97 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
101 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
e1a79915
RS
102 and is lost.
103
104 If any of these macros are left undefined for a particular CPU,
105 C macros are used. */
106
107/* The CPUs come in alphabetical order below.
108
109 Please add support for more CPUs here, or improve the current support
110 for the CPUs below!
f71c71f1 111 (E.g. WE32100, IBM360.) */
d83dd29a 112
3a0d22dd
MM
113#if defined (__GNUC__) && !defined (NO_ASM)
114
d83dd29a
TG
115/* We sometimes need to clobber "cc" with gcc2, but that would not be
116 understood by gcc1. Use cpp to avoid major code duplication. */
117#if __GNUC__ < 2
118#define __CLOBBER_CC
119#define __AND_CLOBBER_CC
120#else /* __GNUC__ >= 2 */
121#define __CLOBBER_CC : "cc"
122#define __AND_CLOBBER_CC , "cc"
123#endif /* __GNUC__ < 2 */
e1a79915 124
130a7859
YZ
125#if defined (__aarch64__)
126
127#if W_TYPE_SIZE == 32
128#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
129#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
130#define COUNT_LEADING_ZEROS_0 32
131#endif /* W_TYPE_SIZE == 32 */
132
133#if W_TYPE_SIZE == 64
134#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X))
135#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X))
136#define COUNT_LEADING_ZEROS_0 64
137#endif /* W_TYPE_SIZE == 64 */
138
139#endif /* __aarch64__ */
140
021b3949 141#if defined (__alpha) && W_TYPE_SIZE == 64
99630555
RH
142/* There is a bug in g++ before version 5 that
143 errors on __builtin_alpha_umulh. */
144#if !defined(__cplusplus) || __GNUC__ >= 5
021b3949
JJ
145#define umul_ppmm(ph, pl, m0, m1) \
146 do { \
147 UDItype __m0 = (m0), __m1 = (m1); \
496e1c4b 148 (ph) = __builtin_alpha_umulh (__m0, __m1); \
021b3949
JJ
149 (pl) = __m0 * __m1; \
150 } while (0)
151#define UMUL_TIME 46
99630555 152#endif /* !c++ */
021b3949
JJ
153#ifndef LONGLONG_STANDALONE
154#define udiv_qrnnd(q, r, n1, n0, d) \
155 do { UDItype __r; \
156 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
157 (r) = __r; \
158 } while (0)
429489e5 159extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
021b3949
JJ
160#define UDIV_TIME 220
161#endif /* LONGLONG_STANDALONE */
8f4773ea 162#ifdef __alpha_cix__
1efd0b97
RH
163#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
164#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
8f4773ea
RH
165#define COUNT_LEADING_ZEROS_0 64
166#else
8f4773ea
RH
167#define count_leading_zeros(COUNT,X) \
168 do { \
169 UDItype __xr = (X), __t, __a; \
1efd0b97 170 __t = __builtin_alpha_cmpbge (0, __xr); \
8f4773ea 171 __a = __clz_tab[__t ^ 0xff] - 1; \
1efd0b97 172 __t = __builtin_alpha_extbl (__xr, __a); \
8f4773ea
RH
173 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
174 } while (0)
175#define count_trailing_zeros(COUNT,X) \
176 do { \
177 UDItype __xr = (X), __t, __a; \
1efd0b97 178 __t = __builtin_alpha_cmpbge (0, __xr); \
8f4773ea
RH
179 __t = ~__t & -~__t; \
180 __a = ((__t & 0xCC) != 0) * 2; \
181 __a += ((__t & 0xF0) != 0) * 4; \
182 __a += ((__t & 0xAA) != 0); \
1efd0b97 183 __t = __builtin_alpha_extbl (__xr, __a); \
8f4773ea
RH
184 __a <<= 3; \
185 __t &= -__t; \
186 __a += ((__t & 0xCC) != 0) * 2; \
187 __a += ((__t & 0xF0) != 0) * 4; \
188 __a += ((__t & 0xAA) != 0); \
189 (COUNT) = __a; \
190 } while (0)
191#endif /* __alpha_cix__ */
021b3949
JJ
192#endif /* __alpha */
193
194#if defined (__arc__) && W_TYPE_SIZE == 32
66ed0683 195#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 196 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
66ed0683
JL
197 : "=r" ((USItype) (sh)), \
198 "=&r" ((USItype) (sl)) \
199 : "%r" ((USItype) (ah)), \
1ab06af6 200 "rICal" ((USItype) (bh)), \
66ed0683 201 "%r" ((USItype) (al)), \
67a9a711
VG
202 "rICal" ((USItype) (bl)) \
203 : "cc")
66ed0683 204#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 205 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
66ed0683
JL
206 : "=r" ((USItype) (sh)), \
207 "=&r" ((USItype) (sl)) \
208 : "r" ((USItype) (ah)), \
1ab06af6 209 "rICal" ((USItype) (bh)), \
66ed0683 210 "r" ((USItype) (al)), \
67a9a711
VG
211 "rICal" ((USItype) (bl)) \
212 : "cc")
d38a64b4
JR
213
214#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
215#ifdef __ARC_NORM__
216#define count_leading_zeros(count, x) \
217 do \
218 { \
219 SItype c_; \
220 \
221 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
222 (count) = c_ + 1; \
223 } \
224 while (0)
225#define COUNT_LEADING_ZEROS_0 32
1ab06af6
CZ
226#endif /* __ARC_NORM__ */
227#endif /* __arc__ */
66ed0683 228
75ffafdc
MH
229#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
230 && W_TYPE_SIZE == 32
e1a79915 231#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 232 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
d6b0bb68
MS
233 : "=r" ((USItype) (sh)), \
234 "=&r" ((USItype) (sl)) \
235 : "%r" ((USItype) (ah)), \
236 "rI" ((USItype) (bh)), \
237 "%r" ((USItype) (al)), \
74900b5a 238 "rI" ((USItype) (bl)) __CLOBBER_CC)
e1a79915 239#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 240 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
d6b0bb68
MS
241 : "=r" ((USItype) (sh)), \
242 "=&r" ((USItype) (sl)) \
243 : "r" ((USItype) (ah)), \
244 "rI" ((USItype) (bh)), \
245 "r" ((USItype) (al)), \
74900b5a 246 "rI" ((USItype) (bl)) __CLOBBER_CC)
8993fde4
RH
247# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
248 || defined(__ARM_ARCH_3__)
249# define umul_ppmm(xh, xl, a, b) \
250 do { \
251 register USItype __t0, __t1, __t2; \
252 __asm__ ("%@ Inlined umul_ppmm\n" \
7efe41a9
RH
253 " mov %2, %5, lsr #16\n" \
254 " mov %0, %6, lsr #16\n" \
255 " bic %3, %5, %2, lsl #16\n" \
256 " bic %4, %6, %0, lsl #16\n" \
257 " mul %1, %3, %4\n" \
258 " mul %4, %2, %4\n" \
259 " mul %3, %0, %3\n" \
260 " mul %0, %2, %0\n" \
261 " adds %3, %4, %3\n" \
262 " addcs %0, %0, #65536\n" \
263 " adds %1, %1, %3, lsl #16\n" \
264 " adc %0, %0, %3, lsr #16" \
0f41302f
MS
265 : "=&r" ((USItype) (xh)), \
266 "=r" ((USItype) (xl)), \
6a04a634 267 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
0f41302f 268 : "r" ((USItype) (a)), \
8993fde4
RH
269 "r" ((USItype) (b)) __CLOBBER_CC ); \
270 } while (0)
271# define UMUL_TIME 20
272# else
273# define umul_ppmm(xh, xl, a, b) \
274 do { \
275 /* Generate umull, under compiler control. */ \
276 register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
277 (xl) = (USItype)__t0; \
278 (xh) = (USItype)(__t0 >> 32); \
279 } while (0)
280# define UMUL_TIME 3
281# endif
282# define UDIV_TIME 100
e1a79915
RS
283#endif /* __arm__ */
284
ef0a4b67
PB
285#if defined(__arm__)
286/* Let gcc decide how best to implement count_leading_zeros. */
287#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
8993fde4 288#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
ef0a4b67
PB
289#define COUNT_LEADING_ZEROS_0 32
290#endif
291
6dab9931
GJL
292#if defined (__AVR__)
293
294#if W_TYPE_SIZE == 16
295#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
296#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
297#define COUNT_LEADING_ZEROS_0 16
298#endif /* W_TYPE_SIZE == 16 */
299
300#if W_TYPE_SIZE == 32
0ad8bb3b
GJL
301#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
302#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
303#define COUNT_LEADING_ZEROS_0 32
6dab9931
GJL
304#endif /* W_TYPE_SIZE == 32 */
305
306#if W_TYPE_SIZE == 64
307#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X))
308#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
309#define COUNT_LEADING_ZEROS_0 64
310#endif /* W_TYPE_SIZE == 64 */
311
312#endif /* defined (__AVR__) */
0ad8bb3b 313
0e499e75
HPN
314#if defined (__CRIS__)
315
316#if __CRIS_arch_version >= 3
e636e508 317#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
0e499e75
HPN
318#define COUNT_LEADING_ZEROS_0 32
319#endif /* __CRIS_arch_version >= 3 */
320
9ef4a0cd
JN
321#if __CRIS_arch_version >= 8
322#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
0e499e75
HPN
323#endif /* __CRIS_arch_version >= 8 */
324
325#if __CRIS_arch_version >= 10
326#define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
327#else
328#define __umulsidi3 __umulsidi3
329extern UDItype __umulsidi3 (USItype, USItype);
330#endif /* __CRIS_arch_version >= 10 */
331
332#define umul_ppmm(w1, w0, u, v) \
333 do { \
334 UDItype __x = __umulsidi3 (u, v); \
335 (w0) = (USItype) (__x); \
336 (w1) = (USItype) (__x >> 32); \
337 } while (0)
338
339/* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
340 DFmode ("double" intrinsics, avoiding two of the three insns handling
341 carry), but defining them as open-code C composing and doing the
342 operation in DImode (UDImode) shows that the DImode needs work:
343 register pressure from requiring neighboring registers and the
344 traffic to and from them come to dominate, in the 4.7 series. */
345
346#endif /* defined (__CRIS__) */
e636e508 347
021b3949 348#if defined (__hppa) && W_TYPE_SIZE == 32
e1a79915 349#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 350 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
0f41302f
MS
351 : "=r" ((USItype) (sh)), \
352 "=&r" ((USItype) (sl)) \
353 : "%rM" ((USItype) (ah)), \
354 "rM" ((USItype) (bh)), \
355 "%rM" ((USItype) (al)), \
356 "rM" ((USItype) (bl)))
e1a79915 357#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 358 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
0f41302f
MS
359 : "=r" ((USItype) (sh)), \
360 "=&r" ((USItype) (sl)) \
361 : "rM" ((USItype) (ah)), \
362 "rM" ((USItype) (bh)), \
363 "rM" ((USItype) (al)), \
364 "rM" ((USItype) (bl)))
d83dd29a
TG
365#if defined (_PA_RISC1_1)
366#define umul_ppmm(w1, w0, u, v) \
367 do { \
368 union \
369 { \
e6c5404d
TG
370 UDItype __f; \
371 struct {USItype __w1, __w0;} __w1w0; \
d83dd29a
TG
372 } __t; \
373 __asm__ ("xmpyu %1,%2,%0" \
374 : "=x" (__t.__f) \
0f41302f
MS
375 : "x" ((USItype) (u)), \
376 "x" ((USItype) (v))); \
d83dd29a
TG
377 (w1) = __t.__w1w0.__w1; \
378 (w0) = __t.__w1w0.__w0; \
379 } while (0)
380#define UMUL_TIME 8
381#else
382#define UMUL_TIME 30
383#endif
384#define UDIV_TIME 40
f71c71f1
TG
385#define count_leading_zeros(count, x) \
386 do { \
387 USItype __tmp; \
388 __asm__ ( \
7efe41a9
RH
389 "ldi 1,%0\n" \
390" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
391" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
392" ldo 16(%0),%0 ; Yes. Perform add.\n" \
393" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
394" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
395" ldo 8(%0),%0 ; Yes. Perform add.\n" \
396" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
397" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
398" ldo 4(%0),%0 ; Yes. Perform add.\n" \
399" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
400" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
401" ldo 2(%0),%0 ; Yes. Perform add.\n" \
402" extru %1,30,1,%1 ; Extract bit 1.\n" \
403" sub %0,%1,%0 ; Subtract it.\n" \
404 : "=r" (count), "=r" (__tmp) : "1" (x)); \
f71c71f1 405 } while (0)
e1a79915
RS
406#endif
407
7e765675 408#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
9602b6a1 409#if !defined (__zarch__)
021b3949
JJ
410#define smul_ppmm(xh, xl, m0, m1) \
411 do { \
412 union {DItype __ll; \
413 struct {USItype __h, __l;} __i; \
7e765675
UW
414 } __x; \
415 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
416 : "=&r" (__x.__ll) \
417 : "r" (m0), "r" (m1)); \
418 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
021b3949
JJ
419 } while (0)
420#define sdiv_qrnnd(q, r, n1, n0, d) \
421 do { \
422 union {DItype __ll; \
423 struct {USItype __h, __l;} __i; \
7e765675
UW
424 } __x; \
425 __x.__i.__h = n1; __x.__i.__l = n0; \
021b3949 426 __asm__ ("dr %0,%2" \
7e765675
UW
427 : "=r" (__x.__ll) \
428 : "0" (__x.__ll), "r" (d)); \
429 (q) = __x.__i.__l; (r) = __x.__i.__h; \
021b3949 430 } while (0)
9602b6a1
AK
431#else
432#define smul_ppmm(xh, xl, m0, m1) \
433 do { \
aecb6197
AK
434 register SItype __r0 __asm__ ("0"); \
435 register SItype __r1 __asm__ ("1") = (m0); \
0d03ac1b 436 \
9602b6a1 437 __asm__ ("mr\t%%r0,%3" \
0d03ac1b
L
438 : "=r" (__r0), "=r" (__r1) \
439 : "r" (__r1), "r" (m1)); \
aecb6197 440 (xh) = __r0; (xl) = __r1; \
9602b6a1 441 } while (0)
aecb6197 442
9602b6a1 443#define sdiv_qrnnd(q, r, n1, n0, d) \
0d03ac1b 444 do { \
aecb6197
AK
445 register SItype __r0 __asm__ ("0") = (n1); \
446 register SItype __r1 __asm__ ("1") = (n0); \
0d03ac1b 447 \
aecb6197 448 __asm__ ("dr\t%%r0,%4" \
0d03ac1b
L
449 : "=r" (__r0), "=r" (__r1) \
450 : "r" (__r0), "r" (__r1), "r" (d)); \
aecb6197 451 (q) = __r1; (r) = __r0; \
9602b6a1
AK
452 } while (0)
453#endif /* __zarch__ */
021b3949
JJ
454#endif
455
456#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
e1a79915 457#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
9fe2319e 458 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
0f41302f
MS
459 : "=r" ((USItype) (sh)), \
460 "=&r" ((USItype) (sl)) \
461 : "%0" ((USItype) (ah)), \
462 "g" ((USItype) (bh)), \
463 "%1" ((USItype) (al)), \
464 "g" ((USItype) (bl)))
e1a79915 465#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
9fe2319e 466 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
0f41302f
MS
467 : "=r" ((USItype) (sh)), \
468 "=&r" ((USItype) (sl)) \
469 : "0" ((USItype) (ah)), \
470 "g" ((USItype) (bh)), \
471 "1" ((USItype) (al)), \
472 "g" ((USItype) (bl)))
e1a79915 473#define umul_ppmm(w1, w0, u, v) \
9fe2319e 474 __asm__ ("mul{l} %3" \
0f41302f
MS
475 : "=a" ((USItype) (w0)), \
476 "=d" ((USItype) (w1)) \
477 : "%0" ((USItype) (u)), \
478 "rm" ((USItype) (v)))
2a0e04e2 479#define udiv_qrnnd(q, r, n1, n0, dv) \
9fe2319e 480 __asm__ ("div{l} %4" \
0f41302f
MS
481 : "=a" ((USItype) (q)), \
482 "=d" ((USItype) (r)) \
483 : "0" ((USItype) (n0)), \
484 "1" ((USItype) (n1)), \
2a0e04e2 485 "rm" ((USItype) (dv)))
e7e0aaec
UB
486#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
487#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
d83dd29a
TG
488#define UMUL_TIME 40
489#define UDIV_TIME 40
e1a79915
RS
490#endif /* 80x86 */
491
8df07a2c 492#if defined (__x86_64__) && W_TYPE_SIZE == 64
e7e0aaec 493#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
9fe2319e 494 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
e7e0aaec
UB
495 : "=r" ((UDItype) (sh)), \
496 "=&r" ((UDItype) (sl)) \
497 : "%0" ((UDItype) (ah)), \
498 "rme" ((UDItype) (bh)), \
499 "%1" ((UDItype) (al)), \
500 "rme" ((UDItype) (bl)))
501#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
9fe2319e 502 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
e7e0aaec
UB
503 : "=r" ((UDItype) (sh)), \
504 "=&r" ((UDItype) (sl)) \
505 : "0" ((UDItype) (ah)), \
506 "rme" ((UDItype) (bh)), \
507 "1" ((UDItype) (al)), \
508 "rme" ((UDItype) (bl)))
509#define umul_ppmm(w1, w0, u, v) \
9fe2319e 510 __asm__ ("mul{q} %3" \
e7e0aaec
UB
511 : "=a" ((UDItype) (w0)), \
512 "=d" ((UDItype) (w1)) \
513 : "%0" ((UDItype) (u)), \
514 "rm" ((UDItype) (v)))
515#define udiv_qrnnd(q, r, n1, n0, dv) \
9fe2319e 516 __asm__ ("div{q} %4" \
e7e0aaec
UB
517 : "=a" ((UDItype) (q)), \
518 "=d" ((UDItype) (r)) \
519 : "0" ((UDItype) (n0)), \
520 "1" ((UDItype) (n1)), \
521 "rm" ((UDItype) (dv)))
10937b0c
L
522#define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
523#define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
e7e0aaec
UB
524#define UMUL_TIME 40
525#define UDIV_TIME 40
526#endif /* x86_64 */
527
021b3949 528#if defined (__i960__) && W_TYPE_SIZE == 32
f71c71f1
TG
529#define umul_ppmm(w1, w0, u, v) \
530 ({union {UDItype __ll; \
531 struct {USItype __l, __h;} __i; \
532 } __xx; \
533 __asm__ ("emul %2,%1,%0" \
534 : "=d" (__xx.__ll) \
0f41302f
MS
535 : "%dI" ((USItype) (u)), \
536 "dI" ((USItype) (v))); \
f71c71f1
TG
537 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
538#define __umulsidi3(u, v) \
539 ({UDItype __w; \
540 __asm__ ("emul %2,%1,%0" \
541 : "=d" (__w) \
0f41302f
MS
542 : "%dI" ((USItype) (u)), \
543 "dI" ((USItype) (v))); \
021b3949 544 __w; })
f71c71f1 545#endif /* __i960__ */
e1a79915 546
c252db20
L
547#if defined (__ia64) && W_TYPE_SIZE == 64
548/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
549 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
550 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
551 register, which takes an extra cycle. */
552#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
553 do { \
554 UWtype __x; \
555 __x = (al) - (bl); \
556 if ((al) < (bl)) \
557 (sh) = (ah) - (bh) - 1; \
558 else \
559 (sh) = (ah) - (bh); \
560 (sl) = __x; \
561 } while (0)
562
563/* Do both product parts in assembly, since that gives better code with
564 all gcc versions. Some callers will just use the upper part, and in
565 that situation we waste an instruction, but not any cycles. */
566#define umul_ppmm(ph, pl, m0, m1) \
567 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
568 : "=&f" (ph), "=f" (pl) \
569 : "f" (m0), "f" (m1))
570#define count_leading_zeros(count, x) \
571 do { \
572 UWtype _x = (x), _y, _a, _c; \
573 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
574 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
575 _c = (_a - 1) << 3; \
576 _x >>= _c; \
577 if (_x >= 1 << 4) \
578 _x >>= 4, _c += 4; \
579 if (_x >= 1 << 2) \
580 _x >>= 2, _c += 2; \
581 _c += _x >> 1; \
582 (count) = W_TYPE_SIZE - 1 - _c; \
583 } while (0)
584/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
585 based, and we don't need a special case for x==0 here */
586#define count_trailing_zeros(count, x) \
587 do { \
588 UWtype __ctz_x = (x); \
589 __asm__ ("popcnt %0 = %1" \
590 : "=r" (count) \
591 : "r" ((__ctz_x-1) & ~__ctz_x)); \
592 } while (0)
593#define UMUL_TIME 14
594#endif
595
021b3949 596#if defined (__M32R__) && W_TYPE_SIZE == 32
be20c0ad
DE
597#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
598 /* The cmp clears the condition bit. */ \
de5c90ff 599 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
be20c0ad
DE
600 : "=r" ((USItype) (sh)), \
601 "=&r" ((USItype) (sl)) \
de5c90ff 602 : "0" ((USItype) (ah)), \
be20c0ad 603 "r" ((USItype) (bh)), \
de5c90ff 604 "1" ((USItype) (al)), \
be20c0ad
DE
605 "r" ((USItype) (bl)) \
606 : "cbit")
607#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
608 /* The cmp clears the condition bit. */ \
de5c90ff 609 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
be20c0ad
DE
610 : "=r" ((USItype) (sh)), \
611 "=&r" ((USItype) (sl)) \
612 : "0" ((USItype) (ah)), \
613 "r" ((USItype) (bh)), \
614 "1" ((USItype) (al)), \
615 "r" ((USItype) (bl)) \
616 : "cbit")
617#endif /* __M32R__ */
618
021b3949 619#if defined (__mc68000__) && W_TYPE_SIZE == 32
e1a79915 620#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 621 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
0f41302f
MS
622 : "=d" ((USItype) (sh)), \
623 "=&d" ((USItype) (sl)) \
624 : "%0" ((USItype) (ah)), \
625 "d" ((USItype) (bh)), \
626 "%1" ((USItype) (al)), \
627 "g" ((USItype) (bl)))
e1a79915 628#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 629 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
0f41302f
MS
630 : "=d" ((USItype) (sh)), \
631 "=&d" ((USItype) (sl)) \
632 : "0" ((USItype) (ah)), \
633 "d" ((USItype) (bh)), \
634 "1" ((USItype) (al)), \
635 "g" ((USItype) (bl)))
f3a5c9a4 636
e5d95b69 637/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
3704aae9 638#if (defined (__mc68020__) && !defined (__mc68060__))
e1a79915
RS
639#define umul_ppmm(w1, w0, u, v) \
640 __asm__ ("mulu%.l %3,%1:%0" \
0f41302f
MS
641 : "=d" ((USItype) (w0)), \
642 "=d" ((USItype) (w1)) \
643 : "%0" ((USItype) (u)), \
644 "dmi" ((USItype) (v)))
d83dd29a 645#define UMUL_TIME 45
e1a79915
RS
646#define udiv_qrnnd(q, r, n1, n0, d) \
647 __asm__ ("divu%.l %4,%1:%0" \
0f41302f
MS
648 : "=d" ((USItype) (q)), \
649 "=d" ((USItype) (r)) \
650 : "0" ((USItype) (n0)), \
651 "1" ((USItype) (n1)), \
652 "dmi" ((USItype) (d)))
d83dd29a
TG
653#define UDIV_TIME 90
654#define sdiv_qrnnd(q, r, n1, n0, d) \
655 __asm__ ("divs%.l %4,%1:%0" \
0f41302f
MS
656 : "=d" ((USItype) (q)), \
657 "=d" ((USItype) (r)) \
658 : "0" ((USItype) (n0)), \
659 "1" ((USItype) (n1)), \
660 "dmi" ((USItype) (d)))
f3a5c9a4 661
3704aae9
RZ
662#elif defined (__mcoldfire__) /* not mc68020 */
663
bab4470d
PB
664#define umul_ppmm(xh, xl, a, b) \
665 __asm__ ("| Inlined umul_ppmm\n" \
666 " move%.l %2,%/d0\n" \
667 " move%.l %3,%/d1\n" \
668 " move%.l %/d0,%/d2\n" \
669 " swap %/d0\n" \
670 " move%.l %/d1,%/d3\n" \
671 " swap %/d1\n" \
672 " move%.w %/d2,%/d4\n" \
673 " mulu %/d3,%/d4\n" \
674 " mulu %/d1,%/d2\n" \
675 " mulu %/d0,%/d3\n" \
676 " mulu %/d0,%/d1\n" \
677 " move%.l %/d4,%/d0\n" \
678 " clr%.w %/d0\n" \
679 " swap %/d0\n" \
680 " add%.l %/d0,%/d2\n" \
681 " add%.l %/d3,%/d2\n" \
682 " jcc 1f\n" \
683 " add%.l %#65536,%/d1\n" \
684 "1: swap %/d2\n" \
685 " moveq %#0,%/d0\n" \
686 " move%.w %/d2,%/d0\n" \
687 " move%.w %/d4,%/d2\n" \
688 " move%.l %/d2,%1\n" \
689 " add%.l %/d1,%/d0\n" \
690 " move%.l %/d0,%0" \
691 : "=g" ((USItype) (xh)), \
692 "=g" ((USItype) (xl)) \
693 : "g" ((USItype) (a)), \
694 "g" ((USItype) (b)) \
695 : "d0", "d1", "d2", "d3", "d4")
696#define UMUL_TIME 100
697#define UDIV_TIME 400
698#else /* not ColdFire */
b24b1d78 699/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
e1a79915 700#define umul_ppmm(xh, xl, a, b) \
7efe41a9
RH
701 __asm__ ("| Inlined umul_ppmm\n" \
702 " move%.l %2,%/d0\n" \
703 " move%.l %3,%/d1\n" \
704 " move%.l %/d0,%/d2\n" \
705 " swap %/d0\n" \
706 " move%.l %/d1,%/d3\n" \
707 " swap %/d1\n" \
708 " move%.w %/d2,%/d4\n" \
709 " mulu %/d3,%/d4\n" \
710 " mulu %/d1,%/d2\n" \
711 " mulu %/d0,%/d3\n" \
712 " mulu %/d0,%/d1\n" \
713 " move%.l %/d4,%/d0\n" \
714 " eor%.w %/d0,%/d0\n" \
715 " swap %/d0\n" \
716 " add%.l %/d0,%/d2\n" \
717 " add%.l %/d3,%/d2\n" \
718 " jcc 1f\n" \
719 " add%.l %#65536,%/d1\n" \
720 "1: swap %/d2\n" \
721 " moveq %#0,%/d0\n" \
722 " move%.w %/d2,%/d0\n" \
723 " move%.w %/d4,%/d2\n" \
724 " move%.l %/d2,%1\n" \
725 " add%.l %/d1,%/d0\n" \
726 " move%.l %/d0,%0" \
0f41302f
MS
727 : "=g" ((USItype) (xh)), \
728 "=g" ((USItype) (xl)) \
729 : "g" ((USItype) (a)), \
f3a5c9a4 730 "g" ((USItype) (b)) \
d83dd29a
TG
731 : "d0", "d1", "d2", "d3", "d4")
732#define UMUL_TIME 100
733#define UDIV_TIME 400
3704aae9 734
e1a79915 735#endif /* not mc68020 */
f3a5c9a4 736
32247ce9
BI
737/* The '020, '030, '040 and '060 have bitfield insns.
738 cpu32 disguises as a 68020, but lacks them. */
3704aae9 739#if defined (__mc68020__) && !defined (__mcpu32__)
f3a5c9a4 740#define count_leading_zeros(count, x) \
4b3d1177 741 __asm__ ("bfffo %1{%b2:%b2},%0" \
f3a5c9a4
RK
742 : "=d" ((USItype) (count)) \
743 : "od" ((USItype) (x)), "n" (0))
7a6525d6
SL
744/* Some ColdFire architectures have a ff1 instruction supported via
745 __builtin_clz. */
746#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
747#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
748#define COUNT_LEADING_ZEROS_0 32
f3a5c9a4 749#endif
e1a79915
RS
750#endif /* mc68000 */
751
021b3949 752#if defined (__m88000__) && W_TYPE_SIZE == 32
e1a79915 753#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 754 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
0f41302f
MS
755 : "=r" ((USItype) (sh)), \
756 "=&r" ((USItype) (sl)) \
757 : "%rJ" ((USItype) (ah)), \
758 "rJ" ((USItype) (bh)), \
759 "%rJ" ((USItype) (al)), \
760 "rJ" ((USItype) (bl)))
e1a79915 761#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 762 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
0f41302f
MS
763 : "=r" ((USItype) (sh)), \
764 "=&r" ((USItype) (sl)) \
765 : "rJ" ((USItype) (ah)), \
766 "rJ" ((USItype) (bh)), \
767 "rJ" ((USItype) (al)), \
768 "rJ" ((USItype) (bl)))
e1a79915
RS
769#define count_leading_zeros(count, x) \
770 do { \
e6c5404d 771 USItype __cbtmp; \
e1a79915 772 __asm__ ("ff1 %0,%1" \
d83dd29a 773 : "=r" (__cbtmp) \
0f41302f 774 : "r" ((USItype) (x))); \
e1a79915
RS
775 (count) = __cbtmp ^ 31; \
776 } while (0)
021b3949 777#define COUNT_LEADING_ZEROS_0 63 /* sic */
d83dd29a 778#if defined (__mc88110__)
f71c71f1
TG
779#define umul_ppmm(wh, wl, u, v) \
780 do { \
781 union {UDItype __ll; \
782 struct {USItype __h, __l;} __i; \
783 } __xx; \
784 __asm__ ("mulu.d %0,%1,%2" \
785 : "=r" (__xx.__ll) \
0f41302f
MS
786 : "r" ((USItype) (u)), \
787 "r" ((USItype) (v))); \
f71c71f1
TG
788 (wh) = __xx.__i.__h; \
789 (wl) = __xx.__i.__l; \
790 } while (0)
d83dd29a 791#define udiv_qrnnd(q, r, n1, n0, d) \
f71c71f1
TG
792 ({union {UDItype __ll; \
793 struct {USItype __h, __l;} __i; \
794 } __xx; \
795 USItype __q; \
796 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
797 __asm__ ("divu.d %0,%1,%2" \
798 : "=r" (__q) \
799 : "r" (__xx.__ll), \
0f41302f 800 "r" ((USItype) (d))); \
f71c71f1
TG
801 (r) = (n0) - __q * (d); (q) = __q; })
802#define UMUL_TIME 5
803#define UDIV_TIME 25
804#else
805#define UMUL_TIME 17
806#define UDIV_TIME 150
807#endif /* __mc88110__ */
e1a79915
RS
808#endif /* __m88000__ */
809
27098b6b
RH
810#if defined (__mn10300__)
811# if defined (__AM33__)
812# define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
813# define umul_ppmm(w1, w0, u, v) \
814 asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
815# define smul_ppmm(w1, w0, u, v) \
816 asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
817# else
818# define umul_ppmm(w1, w0, u, v) \
819 asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
820# define smul_ppmm(w1, w0, u, v) \
821 asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
822# endif
823# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
824 do { \
825 DWunion __s, __a, __b; \
826 __a.s.low = (al); __a.s.high = (ah); \
827 __b.s.low = (bl); __b.s.high = (bh); \
828 __s.ll = __a.ll + __b.ll; \
829 (sl) = __s.s.low; (sh) = __s.s.high; \
830 } while (0)
831# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
832 do { \
833 DWunion __s, __a, __b; \
834 __a.s.low = (al); __a.s.high = (ah); \
835 __b.s.low = (bl); __b.s.high = (bh); \
836 __s.ll = __a.ll - __b.ll; \
837 (sl) = __s.s.low; (sh) = __s.s.high; \
838 } while (0)
839# define udiv_qrnnd(q, r, nh, nl, d) \
840 asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
841# define sdiv_qrnnd(q, r, nh, nl, d) \
842 asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
843# define UMUL_TIME 3
844# define UDIV_TIME 38
845#endif
846
021b3949 847#if defined (__mips__) && W_TYPE_SIZE == 32
21dfc6dc
RS
848#define umul_ppmm(w1, w0, u, v) \
849 do { \
850 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
851 (w1) = (USItype) (__x >> 32); \
852 (w0) = (USItype) (__x); \
853 } while (0)
f71c71f1 854#define UMUL_TIME 10
e1a79915 855#define UDIV_TIME 100
44f9fcef 856
b6799f37 857#if (__mips == 32 || __mips == 64) && ! defined (__mips16)
44f9fcef
SL
858#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
859#define COUNT_LEADING_ZEROS_0 32
860#endif
e1a79915
RS
861#endif /* __mips__ */
862
f34fc46e
DE
863/* FIXME: We should test _IBMR2 here when we add assembly support for the
864 system vendor compilers.
865 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
866 enough, since that hits ARM and m68k too. */
867#if (defined (_ARCH_PPC) /* AIX */ \
f34fc46e
DE
868 || defined (__powerpc__) /* gcc */ \
869 || defined (__POWERPC__) /* BEOS */ \
870 || defined (__ppc__) /* Darwin */ \
fb28435f
AM
871 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
872 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
0d03ac1b 873 && CPU_FAMILY == PPC) \
f34fc46e 874 ) && W_TYPE_SIZE == 32
f71c71f1
TG
875#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
876 do { \
877 if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 878 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
f34fc46e
DE
879 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
880 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
6b39bc38 881 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
f34fc46e 882 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
f71c71f1 883 else \
6b39bc38 884 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
f34fc46e
DE
885 : "=r" (sh), "=&r" (sl) \
886 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
f71c71f1
TG
887 } while (0)
888#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
889 do { \
890 if (__builtin_constant_p (ah) && (ah) == 0) \
6b39bc38 891 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
f34fc46e
DE
892 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
893 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
6b39bc38 894 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
f34fc46e 895 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
f71c71f1 896 else if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 897 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
f34fc46e
DE
898 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
899 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
6b39bc38 900 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
f34fc46e 901 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
f71c71f1 902 else \
6b39bc38 903 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
f34fc46e
DE
904 : "=r" (sh), "=&r" (sl) \
905 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
f71c71f1
TG
906 } while (0)
907#define count_leading_zeros(count, x) \
6b39bc38 908 __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
021b3949 909#define COUNT_LEADING_ZEROS_0 32
f34fc46e 910#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
fb28435f
AM
911 || defined (__ppc__) \
912 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
913 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
0d03ac1b 914 && CPU_FAMILY == PPC)
f71c71f1
TG
915#define umul_ppmm(ph, pl, m0, m1) \
916 do { \
917 USItype __m0 = (m0), __m1 = (m1); \
f34fc46e 918 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
f71c71f1
TG
919 (pl) = __m0 * __m1; \
920 } while (0)
921#define UMUL_TIME 15
922#define smul_ppmm(ph, pl, m0, m1) \
923 do { \
924 SItype __m0 = (m0), __m1 = (m1); \
f34fc46e 925 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
f71c71f1
TG
926 (pl) = __m0 * __m1; \
927 } while (0)
928#define SMUL_TIME 14
929#define UDIV_TIME 120
f71c71f1 930#endif
f34fc46e
DE
931#endif /* 32-bit POWER architecture variants. */
932
933/* We should test _IBMR2 here when we add assembly support for the system
934 vendor compilers. */
935#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
936#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
937 do { \
938 if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 939 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
f34fc46e
DE
940 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
941 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
6b39bc38 942 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
f34fc46e
DE
943 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
944 else \
6b39bc38 945 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
f34fc46e
DE
946 : "=r" (sh), "=&r" (sl) \
947 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
948 } while (0)
949#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
950 do { \
951 if (__builtin_constant_p (ah) && (ah) == 0) \
6b39bc38 952 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
f34fc46e
DE
953 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
954 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
6b39bc38 955 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
f34fc46e
DE
956 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
957 else if (__builtin_constant_p (bh) && (bh) == 0) \
6b39bc38 958 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
f34fc46e
DE
959 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
960 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
6b39bc38 961 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
f34fc46e
DE
962 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
963 else \
6b39bc38 964 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
f34fc46e
DE
965 : "=r" (sh), "=&r" (sl) \
966 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
967 } while (0)
968#define count_leading_zeros(count, x) \
969 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
970#define COUNT_LEADING_ZEROS_0 64
971#define umul_ppmm(ph, pl, m0, m1) \
972 do { \
973 UDItype __m0 = (m0), __m1 = (m1); \
974 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
975 (pl) = __m0 * __m1; \
976 } while (0)
977#define UMUL_TIME 15
978#define smul_ppmm(ph, pl, m0, m1) \
979 do { \
980 DItype __m0 = (m0), __m1 = (m1); \
981 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
982 (pl) = __m0 * __m1; \
983 } while (0)
984#define SMUL_TIME 14 /* ??? */
985#define UDIV_TIME 120 /* ??? */
986#endif /* 64-bit PowerPC. */
f71c71f1 987
021b3949 988#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
e1a79915 989#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 990 __asm__ ("a %1,%5\n\tae %0,%3" \
0f41302f
MS
991 : "=r" ((USItype) (sh)), \
992 "=&r" ((USItype) (sl)) \
993 : "%0" ((USItype) (ah)), \
994 "r" ((USItype) (bh)), \
995 "%1" ((USItype) (al)), \
996 "r" ((USItype) (bl)))
e1a79915 997#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 998 __asm__ ("s %1,%5\n\tse %0,%3" \
0f41302f
MS
999 : "=r" ((USItype) (sh)), \
1000 "=&r" ((USItype) (sl)) \
1001 : "0" ((USItype) (ah)), \
1002 "r" ((USItype) (bh)), \
1003 "1" ((USItype) (al)), \
1004 "r" ((USItype) (bl)))
e1a79915
RS
1005#define umul_ppmm(ph, pl, m0, m1) \
1006 do { \
e6c5404d 1007 USItype __m0 = (m0), __m1 = (m1); \
e1a79915 1008 __asm__ ( \
7efe41a9
RH
1009 "s r2,r2\n" \
1010" mts r10,%2\n" \
1011" m r2,%3\n" \
1012" m r2,%3\n" \
1013" m r2,%3\n" \
1014" m r2,%3\n" \
1015" m r2,%3\n" \
1016" m r2,%3\n" \
1017" m r2,%3\n" \
1018" m r2,%3\n" \
1019" m r2,%3\n" \
1020" m r2,%3\n" \
1021" m r2,%3\n" \
1022" m r2,%3\n" \
1023" m r2,%3\n" \
1024" m r2,%3\n" \
1025" m r2,%3\n" \
1026" m r2,%3\n" \
1027" cas %0,r2,r0\n" \
1028" mfs r10,%1" \
0f41302f
MS
1029 : "=r" ((USItype) (ph)), \
1030 "=r" ((USItype) (pl)) \
d83dd29a
TG
1031 : "%r" (__m0), \
1032 "r" (__m1) \
1033 : "r2"); \
e6c5404d
TG
1034 (ph) += ((((SItype) __m0 >> 31) & __m1) \
1035 + (((SItype) __m1 >> 31) & __m0)); \
e1a79915 1036 } while (0)
d83dd29a
TG
1037#define UMUL_TIME 20
1038#define UDIV_TIME 200
e1a79915
RS
1039#define count_leading_zeros(count, x) \
1040 do { \
1041 if ((x) >= 0x10000) \
1042 __asm__ ("clz %0,%1" \
0f41302f
MS
1043 : "=r" ((USItype) (count)) \
1044 : "r" ((USItype) (x) >> 16)); \
e1a79915
RS
1045 else \
1046 { \
1047 __asm__ ("clz %0,%1" \
0f41302f
MS
1048 : "=r" ((USItype) (count)) \
1049 : "r" ((USItype) (x))); \
e1a79915
RS
1050 (count) += 16; \
1051 } \
1052 } while (0)
1053#endif
1054
ee1c2133
KC
1055#if defined(__riscv)
1056#ifdef __riscv_mul
1057#define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v))
1058#define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b))
1059#else
1060#if __riscv_xlen == 32
1061 #define MULUW3 "call __mulsi3"
1062#elif __riscv_xlen == 64
1063 #define MULUW3 "call __muldi3"
1064#else
1065#error unsupport xlen
1066#endif /* __riscv_xlen */
1067/* We rely on the fact that MULUW3 doesn't clobber the t-registers.
1068 It can get better register allocation result. */
1069#define __muluw3(a, b) \
1070 ({ \
1071 register UWtype __op0 asm ("a0") = a; \
1072 register UWtype __op1 asm ("a1") = b; \
1073 asm volatile (MULUW3 \
1074 : "+r" (__op0), "+r" (__op1) \
1075 : \
1076 : "ra", "a2", "a3"); \
1077 __op0; \
1078 })
1079#endif /* __riscv_mul */
1080#define umul_ppmm(w1, w0, u, v) \
1081 do { \
1082 UWtype __x0, __x1, __x2, __x3; \
1083 UHWtype __ul, __vl, __uh, __vh; \
1084 \
1085 __ul = __ll_lowpart (u); \
1086 __uh = __ll_highpart (u); \
1087 __vl = __ll_lowpart (v); \
1088 __vh = __ll_highpart (v); \
1089 \
1090 __x0 = __muluw3 (__ul, __vl); \
1091 __x1 = __muluw3 (__ul, __vh); \
1092 __x2 = __muluw3 (__uh, __vl); \
1093 __x3 = __muluw3 (__uh, __vh); \
1094 \
1095 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1096 __x1 += __x2; /* but this indeed can */ \
1097 if (__x1 < __x2) /* did we get it? */ \
1098 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
1099 \
1100 (w1) = __x3 + __ll_highpart (__x1); \
1101 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
1102 } while (0)
1103#endif /* __riscv */
1104
af952763 1105#if defined(__sh__) && W_TYPE_SIZE == 32
31b6f0ae 1106#ifndef __sh1__
021b3949
JJ
1107#define umul_ppmm(w1, w0, u, v) \
1108 __asm__ ( \
31b6f0ae
R
1109 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
1110 : "=r<" ((USItype)(w1)), \
1111 "=r<" ((USItype)(w0)) \
021b3949
JJ
1112 : "r" ((USItype)(u)), \
1113 "r" ((USItype)(v)) \
1114 : "macl", "mach")
1115#define UMUL_TIME 5
1116#endif
1117
31b6f0ae
R
1118/* This is the same algorithm as __udiv_qrnnd_c. */
1119#define UDIV_NEEDS_NORMALIZATION 1
1120
1e44e857
DJ
1121#ifdef __FDPIC__
1122/* FDPIC needs a special version of the asm fragment to extract the
1123 code address from the function descriptor. __udiv_qrnnd_16 is
1124 assumed to be local and not to use the GOT, so loading r12 is
1125 not needed. */
1126#define udiv_qrnnd(q, r, n1, n0, d) \
1127 do { \
1128 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
1129 __attribute__ ((visibility ("hidden"))); \
1130 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1131 __asm__ ( \
1132 "mov%M4 %4,r5\n" \
1133" swap.w %3,r4\n" \
1134" swap.w r5,r6\n" \
1135" mov.l @%5,r2\n" \
1136" jsr @r2\n" \
1137" shll16 r6\n" \
1138" swap.w r4,r4\n" \
1139" mov.l @%5,r2\n" \
1140" jsr @r2\n" \
1141" swap.w r1,%0\n" \
1142" or r1,%0" \
1143 : "=r" (q), "=&z" (r) \
1144 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
1145 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
1146 } while (0)
1147#else
31b6f0ae
R
1148#define udiv_qrnnd(q, r, n1, n0, d) \
1149 do { \
1150 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
0d03ac1b 1151 __attribute__ ((visibility ("hidden"))); \
31b6f0ae
R
1152 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1153 __asm__ ( \
1154 "mov%M4 %4,r5\n" \
1155" swap.w %3,r4\n" \
1156" swap.w r5,r6\n" \
1157" jsr @%5\n" \
1158" shll16 r6\n" \
1159" swap.w r4,r4\n" \
1160" jsr @%5\n" \
1161" swap.w r1,%0\n" \
1162" or r1,%0" \
1163 : "=r" (q), "=&z" (r) \
1164 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
092f3f97 1165 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
31b6f0ae 1166 } while (0)
1e44e857 1167#endif /* __FDPIC__ */
31b6f0ae
R
1168
1169#define UDIV_TIME 80
1170
1171#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1172 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
1173 : "=r" (sh), "=r" (sl) \
092f3f97 1174 : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
31b6f0ae
R
1175
1176#endif /* __sh__ */
1177
afdac905
JJ
1178#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1179 && W_TYPE_SIZE == 32
e1a79915 1180#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 1181 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
0f41302f
MS
1182 : "=r" ((USItype) (sh)), \
1183 "=&r" ((USItype) (sl)) \
1184 : "%rJ" ((USItype) (ah)), \
1185 "rI" ((USItype) (bh)), \
1186 "%rJ" ((USItype) (al)), \
1187 "rI" ((USItype) (bl)) \
d83dd29a 1188 __CLOBBER_CC)
e1a79915 1189#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 1190 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
0f41302f
MS
1191 : "=r" ((USItype) (sh)), \
1192 "=&r" ((USItype) (sl)) \
1193 : "rJ" ((USItype) (ah)), \
1194 "rI" ((USItype) (bh)), \
1195 "rJ" ((USItype) (al)), \
1196 "rI" ((USItype) (bl)) \
d83dd29a 1197 __CLOBBER_CC)
0de0d5c6
DM
1198#if defined (__sparc_v9__)
1199#define umul_ppmm(w1, w0, u, v) \
1200 do { \
1201 register USItype __g1 asm ("g1"); \
1202 __asm__ ("umul\t%2,%3,%1\n\t" \
1203 "srlx\t%1, 32, %0" \
1204 : "=r" ((USItype) (w1)), \
1205 "=r" (__g1) \
1206 : "r" ((USItype) (u)), \
1207 "r" ((USItype) (v))); \
1208 (w0) = __g1; \
1209 } while (0)
1210#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1211 __asm__ ("mov\t%2,%%y\n\t" \
1212 "udiv\t%3,%4,%0\n\t" \
1213 "umul\t%0,%4,%1\n\t" \
1214 "sub\t%3,%1,%1" \
1215 : "=&r" ((USItype) (__q)), \
1216 "=&r" ((USItype) (__r)) \
1217 : "r" ((USItype) (__n1)), \
1218 "r" ((USItype) (__n0)), \
1219 "r" ((USItype) (__d)))
1220#else
d9d3acb6 1221#if defined (__sparc_v8__)
e1a79915
RS
1222#define umul_ppmm(w1, w0, u, v) \
1223 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
0f41302f
MS
1224 : "=r" ((USItype) (w1)), \
1225 "=r" ((USItype) (w0)) \
1226 : "r" ((USItype) (u)), \
1227 "r" ((USItype) (v)))
75874af9 1228#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
e1a79915 1229 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
75874af9
KG
1230 : "=&r" ((USItype) (__q)), \
1231 "=&r" ((USItype) (__r)) \
1232 : "r" ((USItype) (__n1)), \
1233 "r" ((USItype) (__n0)), \
1234 "r" ((USItype) (__d)))
e1a79915 1235#else
d9d3acb6
JW
1236#if defined (__sparclite__)
1237/* This has hardware multiply but not divide. It also has two additional
1238 instructions scan (ffs from high bit) and divscc. */
1239#define umul_ppmm(w1, w0, u, v) \
1240 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
0f41302f
MS
1241 : "=r" ((USItype) (w1)), \
1242 "=r" ((USItype) (w0)) \
1243 : "r" ((USItype) (u)), \
1244 "r" ((USItype) (v)))
d9d3acb6 1245#define udiv_qrnnd(q, r, n1, n0, d) \
7efe41a9
RH
1246 __asm__ ("! Inlined udiv_qrnnd\n" \
1247" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1248" tst %%g0\n" \
1249" divscc %3,%4,%%g1\n" \
1250" divscc %%g1,%4,%%g1\n" \
1251" divscc %%g1,%4,%%g1\n" \
1252" divscc %%g1,%4,%%g1\n" \
1253" divscc %%g1,%4,%%g1\n" \
1254" divscc %%g1,%4,%%g1\n" \
1255" divscc %%g1,%4,%%g1\n" \
1256" divscc %%g1,%4,%%g1\n" \
1257" divscc %%g1,%4,%%g1\n" \
1258" divscc %%g1,%4,%%g1\n" \
1259" divscc %%g1,%4,%%g1\n" \
1260" divscc %%g1,%4,%%g1\n" \
1261" divscc %%g1,%4,%%g1\n" \
1262" divscc %%g1,%4,%%g1\n" \
1263" divscc %%g1,%4,%%g1\n" \
1264" divscc %%g1,%4,%%g1\n" \
1265" divscc %%g1,%4,%%g1\n" \
1266" divscc %%g1,%4,%%g1\n" \
1267" divscc %%g1,%4,%%g1\n" \
1268" divscc %%g1,%4,%%g1\n" \
1269" divscc %%g1,%4,%%g1\n" \
1270" divscc %%g1,%4,%%g1\n" \
1271" divscc %%g1,%4,%%g1\n" \
1272" divscc %%g1,%4,%%g1\n" \
1273" divscc %%g1,%4,%%g1\n" \
1274" divscc %%g1,%4,%%g1\n" \
1275" divscc %%g1,%4,%%g1\n" \
1276" divscc %%g1,%4,%%g1\n" \
1277" divscc %%g1,%4,%%g1\n" \
1278" divscc %%g1,%4,%%g1\n" \
1279" divscc %%g1,%4,%%g1\n" \
1280" divscc %%g1,%4,%0\n" \
1281" rd %%y,%1\n" \
1282" bl,a 1f\n" \
1283" add %1,%4,%1\n" \
1284"1: ! End of inline udiv_qrnnd" \
0f41302f
MS
1285 : "=r" ((USItype) (q)), \
1286 "=r" ((USItype) (r)) \
1287 : "r" ((USItype) (n1)), \
1288 "r" ((USItype) (n0)), \
1289 "rI" ((USItype) (d)) \
4de0633f 1290 : "g1" __AND_CLOBBER_CC)
d9d3acb6
JW
1291#define UDIV_TIME 37
1292#define count_leading_zeros(count, x) \
84530511
SC
1293 do { \
1294 __asm__ ("scan %1,1,%0" \
0d03ac1b
L
1295 : "=r" ((USItype) (count)) \
1296 : "r" ((USItype) (x))); \
021b3949
JJ
1297 } while (0)
1298/* Early sparclites return 63 for an argument of 0, but they warn that future
1299 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1300 undefined. */
d9d3acb6 1301#else
e1a79915
RS
1302/* SPARC without integer multiplication and divide instructions.
1303 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
1304#define umul_ppmm(w1, w0, u, v) \
7efe41a9
RH
1305 __asm__ ("! Inlined umul_ppmm\n" \
1306" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
1307" sra %3,31,%%o5 ! Don't move this insn\n" \
1308" and %2,%%o5,%%o5 ! Don't move this insn\n" \
1309" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1310" mulscc %%g1,%3,%%g1\n" \
1311" mulscc %%g1,%3,%%g1\n" \
1312" mulscc %%g1,%3,%%g1\n" \
1313" mulscc %%g1,%3,%%g1\n" \
1314" mulscc %%g1,%3,%%g1\n" \
1315" mulscc %%g1,%3,%%g1\n" \
1316" mulscc %%g1,%3,%%g1\n" \
1317" mulscc %%g1,%3,%%g1\n" \
1318" mulscc %%g1,%3,%%g1\n" \
1319" mulscc %%g1,%3,%%g1\n" \
1320" mulscc %%g1,%3,%%g1\n" \
1321" mulscc %%g1,%3,%%g1\n" \
1322" mulscc %%g1,%3,%%g1\n" \
1323" mulscc %%g1,%3,%%g1\n" \
1324" mulscc %%g1,%3,%%g1\n" \
1325" mulscc %%g1,%3,%%g1\n" \
1326" mulscc %%g1,%3,%%g1\n" \
1327" mulscc %%g1,%3,%%g1\n" \
1328" mulscc %%g1,%3,%%g1\n" \
1329" mulscc %%g1,%3,%%g1\n" \
1330" mulscc %%g1,%3,%%g1\n" \
1331" mulscc %%g1,%3,%%g1\n" \
1332" mulscc %%g1,%3,%%g1\n" \
1333" mulscc %%g1,%3,%%g1\n" \
1334" mulscc %%g1,%3,%%g1\n" \
1335" mulscc %%g1,%3,%%g1\n" \
1336" mulscc %%g1,%3,%%g1\n" \
1337" mulscc %%g1,%3,%%g1\n" \
1338" mulscc %%g1,%3,%%g1\n" \
1339" mulscc %%g1,%3,%%g1\n" \
1340" mulscc %%g1,%3,%%g1\n" \
1341" mulscc %%g1,%3,%%g1\n" \
1342" mulscc %%g1,0,%%g1\n" \
1343" add %%g1,%%o5,%0\n" \
1344" rd %%y,%1" \
0f41302f
MS
1345 : "=r" ((USItype) (w1)), \
1346 "=r" ((USItype) (w0)) \
1347 : "%rI" ((USItype) (u)), \
1348 "r" ((USItype) (v)) \
021b3949 1349 : "g1", "o5" __AND_CLOBBER_CC)
e1a79915
RS
1350#define UMUL_TIME 39 /* 39 instructions */
1351/* It's quite necessary to add this much assembler for the sparc.
1352 The default udiv_qrnnd (in C) is more than 10 times slower! */
75874af9 1353#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
7efe41a9
RH
1354 __asm__ ("! Inlined udiv_qrnnd\n" \
1355" mov 32,%%g1\n" \
1356" subcc %1,%2,%%g0\n" \
1357"1: bcs 5f\n" \
1358" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
1359" sub %1,%2,%1 ! this kills msb of n\n" \
1360" addx %1,%1,%1 ! so this can't give carry\n" \
1361" subcc %%g1,1,%%g1\n" \
1362"2: bne 1b\n" \
1363" subcc %1,%2,%%g0\n" \
1364" bcs 3f\n" \
1365" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
1366" b 3f\n" \
1367" sub %1,%2,%1 ! this kills msb of n\n" \
1368"4: sub %1,%2,%1\n" \
1369"5: addxcc %1,%1,%1\n" \
1370" bcc 2b\n" \
1371" subcc %%g1,1,%%g1\n" \
1372"! Got carry from n. Subtract next step to cancel this carry.\n" \
1373" bne 4b\n" \
1374" addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
1375" sub %1,%2,%1\n" \
1376"3: xnor %0,0,%0\n" \
1377" ! End of inline udiv_qrnnd" \
75874af9
KG
1378 : "=&r" ((USItype) (__q)), \
1379 "=&r" ((USItype) (__r)) \
1380 : "r" ((USItype) (__d)), \
1381 "1" ((USItype) (__n1)), \
1382 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
eebedaa5 1383#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
d9d3acb6
JW
1384#endif /* __sparclite__ */
1385#endif /* __sparc_v8__ */
0de0d5c6 1386#endif /* __sparc_v9__ */
afdac905 1387#endif /* sparc32 */
e1a79915 1388
afdac905
JJ
1389#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1390 && W_TYPE_SIZE == 64
021b3949 1391#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
0de0d5c6
DM
1392 do { \
1393 UDItype __carry = 0; \
1394 __asm__ ("addcc\t%r5,%6,%1\n\t" \
1395 "add\t%r3,%4,%0\n\t" \
1396 "movcs\t%%xcc, 1, %2\n\t" \
1397 "add\t%0, %2, %0" \
1398 : "=r" ((UDItype)(sh)), \
1399 "=&r" ((UDItype)(sl)), \
1400 "+r" (__carry) \
1401 : "%rJ" ((UDItype)(ah)), \
1402 "rI" ((UDItype)(bh)), \
1403 "%rJ" ((UDItype)(al)), \
1404 "rI" ((UDItype)(bl)) \
1405 __CLOBBER_CC); \
1406 } while (0)
021b3949 1407
0de0d5c6
DM
1408#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1409 do { \
1410 UDItype __carry = 0; \
1411 __asm__ ("subcc\t%r5,%6,%1\n\t" \
1412 "sub\t%r3,%4,%0\n\t" \
1413 "movcs\t%%xcc, 1, %2\n\t" \
7b4cf266 1414 "sub\t%0, %2, %0" \
0de0d5c6
DM
1415 : "=r" ((UDItype)(sh)), \
1416 "=&r" ((UDItype)(sl)), \
1417 "+r" (__carry) \
1418 : "%rJ" ((UDItype)(ah)), \
1419 "rI" ((UDItype)(bh)), \
1420 "%rJ" ((UDItype)(al)), \
1421 "rI" ((UDItype)(bl)) \
1422 __CLOBBER_CC); \
1423 } while (0)
021b3949
JJ
1424
1425#define umul_ppmm(wh, wl, u, v) \
1426 do { \
1427 UDItype tmp1, tmp2, tmp3, tmp4; \
1428 __asm__ __volatile__ ( \
7efe41a9
RH
1429 "srl %7,0,%3\n\t" \
1430 "mulx %3,%6,%1\n\t" \
1431 "srlx %6,32,%2\n\t" \
1432 "mulx %2,%3,%4\n\t" \
1433 "sllx %4,32,%5\n\t" \
1434 "srl %6,0,%3\n\t" \
1435 "sub %1,%5,%5\n\t" \
1436 "srlx %5,32,%5\n\t" \
1437 "addcc %4,%5,%4\n\t" \
1438 "srlx %7,32,%5\n\t" \
1439 "mulx %3,%5,%3\n\t" \
1440 "mulx %2,%5,%5\n\t" \
1441 "sethi %%hi(0x80000000),%2\n\t" \
1442 "addcc %4,%3,%4\n\t" \
1443 "srlx %4,32,%4\n\t" \
1444 "add %2,%2,%2\n\t" \
1445 "movcc %%xcc,%%g0,%2\n\t" \
1446 "addcc %5,%4,%5\n\t" \
1447 "sllx %3,32,%3\n\t" \
1448 "add %1,%3,%1\n\t" \
1449 "add %5,%2,%0" \
021b3949
JJ
1450 : "=r" ((UDItype)(wh)), \
1451 "=&r" ((UDItype)(wl)), \
1452 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
1453 : "r" ((UDItype)(u)), \
1454 "r" ((UDItype)(v)) \
1455 __CLOBBER_CC); \
1456 } while (0)
1457#define UMUL_TIME 96
1458#define UDIV_TIME 230
afdac905 1459#endif /* sparc64 */
021b3949
JJ
1460
1461#if defined (__vax__) && W_TYPE_SIZE == 32
e1a79915 1462#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
7efe41a9 1463 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
0f41302f
MS
1464 : "=g" ((USItype) (sh)), \
1465 "=&g" ((USItype) (sl)) \
1466 : "%0" ((USItype) (ah)), \
1467 "g" ((USItype) (bh)), \
1468 "%1" ((USItype) (al)), \
1469 "g" ((USItype) (bl)))
e1a79915 1470#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
7efe41a9 1471 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
0f41302f
MS
1472 : "=g" ((USItype) (sh)), \
1473 "=&g" ((USItype) (sl)) \
1474 : "0" ((USItype) (ah)), \
1475 "g" ((USItype) (bh)), \
1476 "1" ((USItype) (al)), \
1477 "g" ((USItype) (bl)))
e1a79915
RS
1478#define umul_ppmm(xh, xl, m0, m1) \
1479 do { \
e6c5404d
TG
1480 union { \
1481 UDItype __ll; \
1482 struct {USItype __l, __h;} __i; \
1483 } __xx; \
1484 USItype __m0 = (m0), __m1 = (m1); \
e1a79915 1485 __asm__ ("emul %1,%2,$0,%0" \
e6c5404d 1486 : "=r" (__xx.__ll) \
d83dd29a
TG
1487 : "g" (__m0), \
1488 "g" (__m1)); \
e6c5404d
TG
1489 (xh) = __xx.__i.__h; \
1490 (xl) = __xx.__i.__l; \
1491 (xh) += ((((SItype) __m0 >> 31) & __m1) \
1492 + (((SItype) __m1 >> 31) & __m0)); \
e1a79915 1493 } while (0)
f71c71f1
TG
1494#define sdiv_qrnnd(q, r, n1, n0, d) \
1495 do { \
1496 union {DItype __ll; \
1497 struct {SItype __l, __h;} __i; \
1498 } __xx; \
1499 __xx.__i.__h = n1; __xx.__i.__l = n0; \
1500 __asm__ ("ediv %3,%2,%0,%1" \
1501 : "=g" (q), "=g" (r) \
451c2f82 1502 : "g" (__xx.__ll), "g" (d)); \
f71c71f1 1503 } while (0)
e1a79915
RS
1504#endif /* __vax__ */
1505
bcead286
BS
1506#ifdef _TMS320C6X
1507#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1508 do \
1509 { \
1510 UDItype __ll; \
1511 __asm__ ("addu .l1 %1, %2, %0" \
1512 : "=a" (__ll) : "a" (al), "a" (bl)); \
1513 (sl) = (USItype)__ll; \
1514 (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \
1515 } \
1516 while (0)
1517
1518#ifdef _TMS320C6400_PLUS
1519#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1520#define umul_ppmm(w1, w0, u, v) \
1521 do { \
1522 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
1523 (w1) = (USItype) (__x >> 32); \
1524 (w0) = (USItype) (__x); \
1525 } while (0)
1526#endif /* _TMS320C6400_PLUS */
1527
1528#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
1529#ifdef _TMS320C6400
1530#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
1531#endif
1532#define UMUL_TIME 4
1533#define UDIV_TIME 40
1534#endif /* _TMS320C6X */
1535
09fa8841
BW
1536#if defined (__xtensa__) && W_TYPE_SIZE == 32
1537/* This code is not Xtensa-configuration-specific, so rely on the compiler
1538 to expand builtin functions depending on what configuration features
1539 are available. This avoids library calls when the operation can be
1540 performed in-line. */
1541#define umul_ppmm(w1, w0, u, v) \
1542 do { \
1543 DWunion __w; \
1544 __w.ll = __builtin_umulsidi3 (u, v); \
1545 w1 = __w.s.high; \
1546 w0 = __w.s.low; \
1547 } while (0)
1548#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
1549#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
1550#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
1551#endif /* __xtensa__ */
1552
ba964383
NC
1553#if defined xstormy16
1554extern UHItype __stormy16_count_leading_zeros (UHItype);
1555#define count_leading_zeros(count, x) \
1556 do \
1557 { \
1558 UHItype size; \
1559 \
1560 /* We assume that W_TYPE_SIZE is a multiple of 16... */ \
1561 for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
1562 { \
1563 UHItype c; \
1564 \
2a14229d 1565 c = __clzhi2 ((x) >> (size - 16)); \
ba964383
NC
1566 (count) += c; \
1567 if (c != 16) \
1568 break; \
1569 } \
1570 } \
1571 while (0)
1572#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1573#endif
1574
021b3949
JJ
1575#if defined (__z8000__) && W_TYPE_SIZE == 16
1576#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1577 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1578 : "=r" ((unsigned int)(sh)), \
1579 "=&r" ((unsigned int)(sl)) \
1580 : "%0" ((unsigned int)(ah)), \
1581 "r" ((unsigned int)(bh)), \
1582 "%1" ((unsigned int)(al)), \
1583 "rQR" ((unsigned int)(bl)))
1584#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1585 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1586 : "=r" ((unsigned int)(sh)), \
1587 "=&r" ((unsigned int)(sl)) \
1588 : "0" ((unsigned int)(ah)), \
1589 "r" ((unsigned int)(bh)), \
1590 "1" ((unsigned int)(al)), \
1591 "rQR" ((unsigned int)(bl)))
1592#define umul_ppmm(xh, xl, m0, m1) \
1593 do { \
1594 union {long int __ll; \
1595 struct {unsigned int __h, __l;} __i; \
1596 } __xx; \
1597 unsigned int __m0 = (m0), __m1 = (m1); \
1598 __asm__ ("mult %S0,%H3" \
1599 : "=r" (__xx.__i.__h), \
1600 "=r" (__xx.__i.__l) \
1601 : "%1" (__m0), \
1602 "rQR" (__m1)); \
1603 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1604 (xh) += ((((signed int) __m0 >> 15) & __m1) \
1605 + (((signed int) __m1 >> 15) & __m0)); \
1606 } while (0)
1607#endif /* __z8000__ */
1608
e1a79915
RS
1609#endif /* __GNUC__ */
1610
1611/* If this machine has no inline assembler, use C macros. */
1612
1613#if !defined (add_ssaaaa)
1614#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1615 do { \
021b3949 1616 UWtype __x; \
e1a79915
RS
1617 __x = (al) + (bl); \
1618 (sh) = (ah) + (bh) + (__x < (al)); \
1619 (sl) = __x; \
1620 } while (0)
1621#endif
1622
1623#if !defined (sub_ddmmss)
1624#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1625 do { \
021b3949 1626 UWtype __x; \
e1a79915
RS
1627 __x = (al) - (bl); \
1628 (sh) = (ah) - (bh) - (__x > (al)); \
1629 (sl) = __x; \
1630 } while (0)
1631#endif
1632
7e765675
UW
1633/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1634 smul_ppmm. */
1635#if !defined (umul_ppmm) && defined (smul_ppmm)
1636#define umul_ppmm(w1, w0, u, v) \
1637 do { \
1638 UWtype __w1; \
1639 UWtype __xm0 = (u), __xm1 = (v); \
1640 smul_ppmm (__w1, w0, __xm0, __xm1); \
1641 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1642 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1643 } while (0)
1644#endif
1645
1646/* If we still don't have umul_ppmm, define it using plain C. */
e1a79915
RS
1647#if !defined (umul_ppmm)
1648#define umul_ppmm(w1, w0, u, v) \
1649 do { \
021b3949
JJ
1650 UWtype __x0, __x1, __x2, __x3; \
1651 UHWtype __ul, __vl, __uh, __vh; \
e1a79915
RS
1652 \
1653 __ul = __ll_lowpart (u); \
1654 __uh = __ll_highpart (u); \
1655 __vl = __ll_lowpart (v); \
1656 __vh = __ll_highpart (v); \
1657 \
021b3949
JJ
1658 __x0 = (UWtype) __ul * __vl; \
1659 __x1 = (UWtype) __ul * __vh; \
1660 __x2 = (UWtype) __uh * __vl; \
1661 __x3 = (UWtype) __uh * __vh; \
e1a79915
RS
1662 \
1663 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1664 __x1 += __x2; /* but this indeed can */ \
1665 if (__x1 < __x2) /* did we get it? */ \
eebedaa5 1666 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
e1a79915
RS
1667 \
1668 (w1) = __x3 + __ll_highpart (__x1); \
1669 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
1670 } while (0)
1671#endif
1672
1673#if !defined (__umulsidi3)
1674#define __umulsidi3(u, v) \
996ed075 1675 ({DWunion __w; \
e1a79915
RS
1676 umul_ppmm (__w.s.high, __w.s.low, u, v); \
1677 __w.ll; })
1678#endif
1679
d83dd29a
TG
1680/* Define this unconditionally, so it can be used for debugging. */
1681#define __udiv_qrnnd_c(q, r, n1, n0, d) \
e1a79915 1682 do { \
021b3949
JJ
1683 UWtype __d1, __d0, __q1, __q0; \
1684 UWtype __r1, __r0, __m; \
e1a79915
RS
1685 __d1 = __ll_highpart (d); \
1686 __d0 = __ll_lowpart (d); \
1687 \
1688 __r1 = (n1) % __d1; \
1689 __q1 = (n1) / __d1; \
021b3949 1690 __m = (UWtype) __q1 * __d0; \
e1a79915
RS
1691 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
1692 if (__r1 < __m) \
1693 { \
1694 __q1--, __r1 += (d); \
1695 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1696 if (__r1 < __m) \
1697 __q1--, __r1 += (d); \
1698 } \
1699 __r1 -= __m; \
1700 \
1701 __r0 = __r1 % __d1; \
1702 __q0 = __r1 / __d1; \
021b3949 1703 __m = (UWtype) __q0 * __d0; \
e1a79915
RS
1704 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1705 if (__r0 < __m) \
1706 { \
1707 __q0--, __r0 += (d); \
1708 if (__r0 >= (d)) \
1709 if (__r0 < __m) \
1710 __q0--, __r0 += (d); \
1711 } \
1712 __r0 -= __m; \
1713 \
021b3949 1714 (q) = (UWtype) __q1 * __ll_B | __q0; \
e1a79915
RS
1715 (r) = __r0; \
1716 } while (0)
1814cfd9
TG
1717
1718/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
9077d7fa 1719 __udiv_w_sdiv (defined in libgcc or elsewhere). */
1814cfd9
TG
1720#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1721#define udiv_qrnnd(q, r, nh, nl, d) \
1722 do { \
5e89d0ad
AK
1723 extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \
1724 UWtype __r; \
9077d7fa 1725 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
1814cfd9
TG
1726 (r) = __r; \
1727 } while (0)
1728#endif
1729
d83dd29a
TG
1730/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1731#if !defined (udiv_qrnnd)
1732#define UDIV_NEEDS_NORMALIZATION 1
1733#define udiv_qrnnd __udiv_qrnnd_c
e1a79915
RS
1734#endif
1735
1736#if !defined (count_leading_zeros)
e1a79915
RS
1737#define count_leading_zeros(count, x) \
1738 do { \
021b3949
JJ
1739 UWtype __xr = (x); \
1740 UWtype __a; \
e1a79915 1741 \
021b3949 1742 if (W_TYPE_SIZE <= 32) \
d83dd29a 1743 { \
021b3949
JJ
1744 __a = __xr < ((UWtype)1<<2*__BITS4) \
1745 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
1746 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
d83dd29a
TG
1747 } \
1748 else \
1749 { \
021b3949 1750 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
d83dd29a
TG
1751 if (((__xr >> __a) & 0xff) != 0) \
1752 break; \
1753 } \
e1a79915 1754 \
021b3949
JJ
1755 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
1756 } while (0)
1757#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1758#endif
1759
1760#if !defined (count_trailing_zeros)
1761/* Define count_trailing_zeros using count_leading_zeros. The latter might be
1762 defined in asm, but if it is not, the C version above is good enough. */
1763#define count_trailing_zeros(count, x) \
1764 do { \
1765 UWtype __ctz_x = (x); \
1766 UWtype __ctz_c; \
1767 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
1768 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
e1a79915
RS
1769 } while (0)
1770#endif
1771
1772#ifndef UDIV_NEEDS_NORMALIZATION
1773#define UDIV_NEEDS_NORMALIZATION 0
1774#endif