]> git.ipfire.org Git - thirdparty/glibc.git/blame - stdlib/longlong.h
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / stdlib / longlong.h
CommitLineData
28f540f4 1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
f7a9f785 2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
41b0afab 3
41bdb6e2 4 This file is part of the GNU C Library.
28f540f4 5
41bdb6e2
AJ
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
1da2d51a 8 License as published by the Free Software Foundation; either
41bdb6e2 9 version 2.1 of the License, or (at your option) any later version.
28f540f4 10
def7fbd6
AS
11 In addition to the permissions in the GNU Lesser General Public
12 License, the Free Software Foundation gives you unlimited
13 permission to link the compiled version of this file into
14 combinations with other programs, and to distribute those
15 combinations without any restriction coming from the use of this
16 file. (The Lesser General Public License restrictions do apply in
17 other respects; for example, they cover modification of the file,
18 and distribution when not linked into a combine executable.)
19
41bdb6e2
AJ
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
28f540f4 24
41bdb6e2 25 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
26 License along with the GNU C Library; if not, see
27 <http://www.gnu.org/licenses/>. */
28f540f4 28
e9b3e3c5
UD
29/* You have to define the following before including this file:
30
31 UWtype -- An unsigned type, default type for operations (typically a "word")
32 UHWtype -- An unsigned type, at least half the size of UWtype.
33 UDWtype -- An unsigned type, at least twice as large a UWtype
34 W_TYPE_SIZE -- size in bits of UWtype
35
36 UQItype -- Unsigned 8 bit type.
37 SItype, USItype -- Signed and unsigned 32 bit types.
38 DItype, UDItype -- Signed and unsigned 64 bit types.
39
40 On a 32 bit machine UWtype should typically be USItype;
f30070ae 41 on a 64 bit machine, UWtype should typically be UDItype. */
b928942e 42
e9b3e3c5
UD
43#define __BITS4 (W_TYPE_SIZE / 4)
44#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48#ifndef W_TYPE_SIZE
49#define W_TYPE_SIZE 32
50#define UWtype USItype
51#define UHWtype USItype
52#define UDWtype UDItype
53#endif
04fbd653 54
def7fbd6
AS
55/* Used in glibc only. */
56#ifndef attribute_hidden
57#define attribute_hidden
58#endif
59
6f8a7dff 60extern const UQItype __clz_tab[256] attribute_hidden;
f30070ae 61
28f540f4
RM
62/* Define auxiliary asm macros.
63
f30070ae
RM
64 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
e9b3e3c5 66 word product in HIGH_PROD and LOW_PROD.
28f540f4 67
e9b3e3c5
UD
68 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69 UDWtype product. This is just a variant of umul_ppmm.
28f540f4
RM
70
71 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
e9b3e3c5
UD
72 denominator) divides a UDWtype, composed by the UWtype integers
73 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
75 than DENOMINATOR for correct operation. If, in addition, the most
76 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77 UDIV_NEEDS_NORMALIZATION is defined to 1.
28f540f4
RM
78
79 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
e9b3e3c5
UD
80 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
81 is rounded towards 0.
1da2d51a 82
e9b3e3c5 83 5) count_leading_zeros(count, x) counts the number of zero-bits from the
41b0afab 84 msb to the first nonzero bit in the UWtype X. This is the number of
e9b3e3c5
UD
85 steps X needs to be shifted left to set the msb. Undefined for X == 0,
86 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
1da2d51a 87
62818cfd
UD
88 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89 from the least significant end.
90
91 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
e9b3e3c5
UD
92 high_addend_2, low_addend_2) adds two UWtype integers, composed by
93 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
95 (i.e. carry out) is not stored anywhere, and is lost.
96
97 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
101 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
28f540f4
RM
102 and is lost.
103
104 If any of these macros are left undefined for a particular CPU,
105 C macros are used. */
106
107/* The CPUs come in alphabetical order below.
108
109 Please add support for more CPUs here, or improve the current support
1da2d51a
UD
110 for the CPUs below!
111 (E.g. WE32100, IBM360.) */
28f540f4
RM
112
113#if defined (__GNUC__) && !defined (NO_ASM)
114
115/* We sometimes need to clobber "cc" with gcc2, but that would not be
116 understood by gcc1. Use cpp to avoid major code duplication. */
117#if __GNUC__ < 2
118#define __CLOBBER_CC
119#define __AND_CLOBBER_CC
120#else /* __GNUC__ >= 2 */
121#define __CLOBBER_CC : "cc"
122#define __AND_CLOBBER_CC , "cc"
123#endif /* __GNUC__ < 2 */
124
ea6c92f3
YZ
125#if defined (__aarch64__)
126
127#if W_TYPE_SIZE == 32
128#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
129#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
130#define COUNT_LEADING_ZEROS_0 32
131#endif /* W_TYPE_SIZE == 32 */
132
133#if W_TYPE_SIZE == 64
134#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X))
135#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X))
136#define COUNT_LEADING_ZEROS_0 64
137#endif /* W_TYPE_SIZE == 64 */
138
139#endif /* __aarch64__ */
140
e9b3e3c5
UD
141#if defined (__alpha) && W_TYPE_SIZE == 64
142#define umul_ppmm(ph, pl, m0, m1) \
143 do { \
144 UDItype __m0 = (m0), __m1 = (m1); \
f30070ae 145 (ph) = __builtin_alpha_umulh (__m0, __m1); \
e9b3e3c5
UD
146 (pl) = __m0 * __m1; \
147 } while (0)
148#define UMUL_TIME 46
149#ifndef LONGLONG_STANDALONE
150#define udiv_qrnnd(q, r, n1, n0, d) \
151 do { UDItype __r; \
152 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
153 (r) = __r; \
154 } while (0)
f2672ddd 155extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
e9b3e3c5
UD
156#define UDIV_TIME 220
157#endif /* LONGLONG_STANDALONE */
41b0afab 158#ifdef __alpha_cix__
f30070ae
RM
159#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
160#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
41b0afab
RM
161#define COUNT_LEADING_ZEROS_0 64
162#else
41b0afab
RM
163#define count_leading_zeros(COUNT,X) \
164 do { \
165 UDItype __xr = (X), __t, __a; \
f30070ae 166 __t = __builtin_alpha_cmpbge (0, __xr); \
41b0afab 167 __a = __clz_tab[__t ^ 0xff] - 1; \
f30070ae 168 __t = __builtin_alpha_extbl (__xr, __a); \
41b0afab
RM
169 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
170 } while (0)
171#define count_trailing_zeros(COUNT,X) \
172 do { \
173 UDItype __xr = (X), __t, __a; \
f30070ae 174 __t = __builtin_alpha_cmpbge (0, __xr); \
41b0afab
RM
175 __t = ~__t & -~__t; \
176 __a = ((__t & 0xCC) != 0) * 2; \
177 __a += ((__t & 0xF0) != 0) * 4; \
178 __a += ((__t & 0xAA) != 0); \
f30070ae 179 __t = __builtin_alpha_extbl (__xr, __a); \
41b0afab
RM
180 __a <<= 3; \
181 __t &= -__t; \
182 __a += ((__t & 0xCC) != 0) * 2; \
183 __a += ((__t & 0xF0) != 0) * 4; \
184 __a += ((__t & 0xAA) != 0); \
185 (COUNT) = __a; \
186 } while (0)
187#endif /* __alpha_cix__ */
e9b3e3c5
UD
188#endif /* __alpha */
189
190#if defined (__arc__) && W_TYPE_SIZE == 32
1da2d51a 191#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 192 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
1da2d51a
UD
193 : "=r" ((USItype) (sh)), \
194 "=&r" ((USItype) (sl)) \
195 : "%r" ((USItype) (ah)), \
196 "rIJ" ((USItype) (bh)), \
197 "%r" ((USItype) (al)), \
198 "rIJ" ((USItype) (bl)))
199#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 200 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
1da2d51a
UD
201 : "=r" ((USItype) (sh)), \
202 "=&r" ((USItype) (sl)) \
203 : "r" ((USItype) (ah)), \
204 "rIJ" ((USItype) (bh)), \
205 "r" ((USItype) (al)), \
206 "rIJ" ((USItype) (bl)))
ab07cea8
JM
207
208#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
209#ifdef __ARC_NORM__
210#define count_leading_zeros(count, x) \
211 do \
212 { \
213 SItype c_; \
214 \
215 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
216 (count) = c_ + 1; \
217 } \
218 while (0)
219#define COUNT_LEADING_ZEROS_0 32
220#endif
1da2d51a 221#endif
28f540f4 222
8115f29b
L
223#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
224 && W_TYPE_SIZE == 32
28f540f4 225#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 226 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
1da2d51a
UD
227 : "=r" ((USItype) (sh)), \
228 "=&r" ((USItype) (sl)) \
229 : "%r" ((USItype) (ah)), \
230 "rI" ((USItype) (bh)), \
231 "%r" ((USItype) (al)), \
f30070ae 232 "rI" ((USItype) (bl)) __CLOBBER_CC)
28f540f4 233#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 234 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
1da2d51a
UD
235 : "=r" ((USItype) (sh)), \
236 "=&r" ((USItype) (sl)) \
237 : "r" ((USItype) (ah)), \
238 "rI" ((USItype) (bh)), \
239 "r" ((USItype) (al)), \
f30070ae 240 "rI" ((USItype) (bl)) __CLOBBER_CC)
8115f29b
L
241# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
242 || defined(__ARM_ARCH_3__)
243# define umul_ppmm(xh, xl, a, b) \
244 do { \
245 register USItype __t0, __t1, __t2; \
246 __asm__ ("%@ Inlined umul_ppmm\n" \
41b0afab
RM
247 " mov %2, %5, lsr #16\n" \
248 " mov %0, %6, lsr #16\n" \
249 " bic %3, %5, %2, lsl #16\n" \
250 " bic %4, %6, %0, lsl #16\n" \
251 " mul %1, %3, %4\n" \
252 " mul %4, %2, %4\n" \
253 " mul %3, %0, %3\n" \
254 " mul %0, %2, %0\n" \
255 " adds %3, %4, %3\n" \
256 " addcs %0, %0, #65536\n" \
257 " adds %1, %1, %3, lsl #16\n" \
258 " adc %0, %0, %3, lsr #16" \
1da2d51a
UD
259 : "=&r" ((USItype) (xh)), \
260 "=r" ((USItype) (xl)), \
261 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
262 : "r" ((USItype) (a)), \
8115f29b
L
263 "r" ((USItype) (b)) __CLOBBER_CC ); \
264 } while (0)
265# define UMUL_TIME 20
266# else
267# define umul_ppmm(xh, xl, a, b) \
268 do { \
269 /* Generate umull, under compiler control. */ \
270 register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
271 (xl) = (USItype)__t0; \
272 (xh) = (USItype)(__t0 >> 32); \
273 } while (0)
274# define UMUL_TIME 3
275# endif
276# define UDIV_TIME 100
28f540f4
RM
277#endif /* __arm__ */
278
24784465
RM
279#if defined(__arm__)
280/* Let gcc decide how best to implement count_leading_zeros. */
281#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
8115f29b 282#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
24784465
RM
283#define COUNT_LEADING_ZEROS_0 32
284#endif
285
8115f29b
L
286#if defined (__AVR__)
287
288#if W_TYPE_SIZE == 16
289#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
290#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
291#define COUNT_LEADING_ZEROS_0 16
292#endif /* W_TYPE_SIZE == 16 */
293
294#if W_TYPE_SIZE == 32
295#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
296#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
297#define COUNT_LEADING_ZEROS_0 32
298#endif /* W_TYPE_SIZE == 32 */
299
300#if W_TYPE_SIZE == 64
301#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X))
302#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
303#define COUNT_LEADING_ZEROS_0 64
304#endif /* W_TYPE_SIZE == 64 */
305
306#endif /* defined (__AVR__) */
307
ab07cea8
JM
308#if defined (__CRIS__)
309
310#if __CRIS_arch_version >= 3
24784465 311#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
ab07cea8
JM
312#define COUNT_LEADING_ZEROS_0 32
313#endif /* __CRIS_arch_version >= 3 */
314
24784465
RM
315#if __CRIS_arch_version >= 8
316#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
ab07cea8
JM
317#endif /* __CRIS_arch_version >= 8 */
318
319#if __CRIS_arch_version >= 10
320#define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
321#else
322#define __umulsidi3 __umulsidi3
323extern UDItype __umulsidi3 (USItype, USItype);
324#endif /* __CRIS_arch_version >= 10 */
325
326#define umul_ppmm(w1, w0, u, v) \
327 do { \
328 UDItype __x = __umulsidi3 (u, v); \
329 (w0) = (USItype) (__x); \
330 (w1) = (USItype) (__x >> 32); \
331 } while (0)
332
333/* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
334 DFmode ("double" intrinsics, avoiding two of the three insns handling
335 carry), but defining them as open-code C composing and doing the
336 operation in DImode (UDImode) shows that the DImode needs work:
337 register pressure from requiring neighboring registers and the
338 traffic to and from them come to dominate, in the 4.7 series. */
339
340#endif /* defined (__CRIS__) */
24784465 341
e9b3e3c5 342#if defined (__hppa) && W_TYPE_SIZE == 32
28f540f4 343#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 344 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
1da2d51a
UD
345 : "=r" ((USItype) (sh)), \
346 "=&r" ((USItype) (sl)) \
347 : "%rM" ((USItype) (ah)), \
348 "rM" ((USItype) (bh)), \
349 "%rM" ((USItype) (al)), \
350 "rM" ((USItype) (bl)))
28f540f4 351#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 352 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
1da2d51a
UD
353 : "=r" ((USItype) (sh)), \
354 "=&r" ((USItype) (sl)) \
355 : "rM" ((USItype) (ah)), \
356 "rM" ((USItype) (bh)), \
357 "rM" ((USItype) (al)), \
358 "rM" ((USItype) (bl)))
28f540f4 359#if defined (_PA_RISC1_1)
1da2d51a 360#define umul_ppmm(w1, w0, u, v) \
28f540f4 361 do { \
1da2d51a
UD
362 union \
363 { \
364 UDItype __f; \
365 struct {USItype __w1, __w0;} __w1w0; \
366 } __t; \
28f540f4 367 __asm__ ("xmpyu %1,%2,%0" \
1da2d51a
UD
368 : "=x" (__t.__f) \
369 : "x" ((USItype) (u)), \
370 "x" ((USItype) (v))); \
371 (w1) = __t.__w1w0.__w1; \
372 (w0) = __t.__w1w0.__w0; \
373 } while (0)
28f540f4 374#define UMUL_TIME 8
28f540f4 375#else
1da2d51a 376#define UMUL_TIME 30
28f540f4 377#endif
1da2d51a 378#define UDIV_TIME 40
28f540f4 379#define count_leading_zeros(count, x) \
41b0afab
RM
380 do { \
381 USItype __tmp; \
382 __asm__ ( \
383 "ldi 1,%0\n" \
384" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
385" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
386" ldo 16(%0),%0 ; Yes. Perform add.\n" \
387" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
388" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
389" ldo 8(%0),%0 ; Yes. Perform add.\n" \
390" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
391" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
392" ldo 4(%0),%0 ; Yes. Perform add.\n" \
393" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
394" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
395" ldo 2(%0),%0 ; Yes. Perform add.\n" \
396" extru %1,30,1,%1 ; Extract bit 1.\n" \
397" sub %0,%1,%0 ; Subtract it.\n" \
398 : "=r" (count), "=r" (__tmp) : "1" (x)); \
28f540f4 399 } while (0)
28f540f4
RM
400#endif
401
f30070ae 402#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
def7fbd6 403#if !defined (__zarch__)
e9b3e3c5
UD
404#define smul_ppmm(xh, xl, m0, m1) \
405 do { \
406 union {DItype __ll; \
407 struct {USItype __h, __l;} __i; \
f30070ae
RM
408 } __x; \
409 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
410 : "=&r" (__x.__ll) \
411 : "r" (m0), "r" (m1)); \
412 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
e9b3e3c5
UD
413 } while (0)
414#define sdiv_qrnnd(q, r, n1, n0, d) \
415 do { \
416 union {DItype __ll; \
417 struct {USItype __h, __l;} __i; \
f30070ae
RM
418 } __x; \
419 __x.__i.__h = n1; __x.__i.__l = n0; \
e9b3e3c5 420 __asm__ ("dr %0,%2" \
f30070ae
RM
421 : "=r" (__x.__ll) \
422 : "0" (__x.__ll), "r" (d)); \
423 (q) = __x.__i.__l; (r) = __x.__i.__h; \
e9b3e3c5 424 } while (0)
def7fbd6
AS
425#else
426#define smul_ppmm(xh, xl, m0, m1) \
427 do { \
48693bea
AK
428 register SItype __r0 __asm__ ("0"); \
429 register SItype __r1 __asm__ ("1") = (m0); \
430 \
def7fbd6 431 __asm__ ("mr\t%%r0,%3" \
48693bea
AK
432 : "=r" (__r0), "=r" (__r1) \
433 : "r" (__r1), "r" (m1)); \
434 (xh) = __r0; (xl) = __r1; \
def7fbd6 435 } while (0)
48693bea 436
def7fbd6
AS
437#define sdiv_qrnnd(q, r, n1, n0, d) \
438 do { \
48693bea
AK
439 register SItype __r0 __asm__ ("0") = (n1); \
440 register SItype __r1 __asm__ ("1") = (n0); \
441 \
442 __asm__ ("dr\t%%r0,%4" \
443 : "=r" (__r0), "=r" (__r1) \
444 : "r" (__r0), "r" (__r1), "r" (d)); \
445 (q) = __r1; (r) = __r0; \
def7fbd6
AS
446 } while (0)
447#endif /* __zarch__ */
e9b3e3c5
UD
448#endif
449
450#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
28f540f4 451#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
24784465 452 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
1da2d51a
UD
453 : "=r" ((USItype) (sh)), \
454 "=&r" ((USItype) (sl)) \
455 : "%0" ((USItype) (ah)), \
456 "g" ((USItype) (bh)), \
457 "%1" ((USItype) (al)), \
458 "g" ((USItype) (bl)))
28f540f4 459#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
24784465 460 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
1da2d51a
UD
461 : "=r" ((USItype) (sh)), \
462 "=&r" ((USItype) (sl)) \
463 : "0" ((USItype) (ah)), \
464 "g" ((USItype) (bh)), \
465 "1" ((USItype) (al)), \
466 "g" ((USItype) (bl)))
28f540f4 467#define umul_ppmm(w1, w0, u, v) \
24784465 468 __asm__ ("mul{l} %3" \
1da2d51a
UD
469 : "=a" ((USItype) (w0)), \
470 "=d" ((USItype) (w1)) \
471 : "%0" ((USItype) (u)), \
472 "rm" ((USItype) (v)))
41b0afab 473#define udiv_qrnnd(q, r, n1, n0, dv) \
24784465 474 __asm__ ("div{l} %4" \
1da2d51a
UD
475 : "=a" ((USItype) (q)), \
476 "=d" ((USItype) (r)) \
477 : "0" ((USItype) (n0)), \
478 "1" ((USItype) (n1)), \
41b0afab 479 "rm" ((USItype) (dv)))
24784465
RM
480#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
481#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
28f540f4
RM
482#define UMUL_TIME 40
483#define UDIV_TIME 40
484#endif /* 80x86 */
485
6426d77e 486#if defined (__x86_64__) && W_TYPE_SIZE == 64
24784465
RM
487#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
488 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
489 : "=r" ((UDItype) (sh)), \
490 "=&r" ((UDItype) (sl)) \
491 : "%0" ((UDItype) (ah)), \
492 "rme" ((UDItype) (bh)), \
493 "%1" ((UDItype) (al)), \
494 "rme" ((UDItype) (bl)))
495#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
496 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
497 : "=r" ((UDItype) (sh)), \
498 "=&r" ((UDItype) (sl)) \
499 : "0" ((UDItype) (ah)), \
500 "rme" ((UDItype) (bh)), \
501 "1" ((UDItype) (al)), \
502 "rme" ((UDItype) (bl)))
503#define umul_ppmm(w1, w0, u, v) \
504 __asm__ ("mul{q} %3" \
505 : "=a" ((UDItype) (w0)), \
506 "=d" ((UDItype) (w1)) \
507 : "%0" ((UDItype) (u)), \
508 "rm" ((UDItype) (v)))
509#define udiv_qrnnd(q, r, n1, n0, dv) \
510 __asm__ ("div{q} %4" \
511 : "=a" ((UDItype) (q)), \
512 "=d" ((UDItype) (r)) \
513 : "0" ((UDItype) (n0)), \
514 "1" ((UDItype) (n1)), \
515 "rm" ((UDItype) (dv)))
8115f29b
L
516#define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
517#define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
24784465
RM
518#define UMUL_TIME 40
519#define UDIV_TIME 40
520#endif /* x86_64 */
521
e9b3e3c5 522#if defined (__i960__) && W_TYPE_SIZE == 32
28f540f4
RM
523#define umul_ppmm(w1, w0, u, v) \
524 ({union {UDItype __ll; \
525 struct {USItype __l, __h;} __i; \
526 } __xx; \
527 __asm__ ("emul %2,%1,%0" \
528 : "=d" (__xx.__ll) \
1da2d51a
UD
529 : "%dI" ((USItype) (u)), \
530 "dI" ((USItype) (v))); \
28f540f4
RM
531 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
532#define __umulsidi3(u, v) \
533 ({UDItype __w; \
534 __asm__ ("emul %2,%1,%0" \
535 : "=d" (__w) \
1da2d51a
UD
536 : "%dI" ((USItype) (u)), \
537 "dI" ((USItype) (v))); \
62818cfd 538 __w; })
1da2d51a 539#endif /* __i960__ */
28f540f4 540
def7fbd6
AS
541#if defined (__ia64) && W_TYPE_SIZE == 64
542/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
543 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
544 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
545 register, which takes an extra cycle. */
546#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
547 do { \
548 UWtype __x; \
549 __x = (al) - (bl); \
550 if ((al) < (bl)) \
551 (sh) = (ah) - (bh) - 1; \
552 else \
553 (sh) = (ah) - (bh); \
554 (sl) = __x; \
555 } while (0)
556
557/* Do both product parts in assembly, since that gives better code with
558 all gcc versions. Some callers will just use the upper part, and in
559 that situation we waste an instruction, but not any cycles. */
560#define umul_ppmm(ph, pl, m0, m1) \
561 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
562 : "=&f" (ph), "=f" (pl) \
563 : "f" (m0), "f" (m1))
564#define count_leading_zeros(count, x) \
565 do { \
566 UWtype _x = (x), _y, _a, _c; \
567 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
568 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
569 _c = (_a - 1) << 3; \
570 _x >>= _c; \
571 if (_x >= 1 << 4) \
572 _x >>= 4, _c += 4; \
573 if (_x >= 1 << 2) \
574 _x >>= 2, _c += 2; \
575 _c += _x >> 1; \
576 (count) = W_TYPE_SIZE - 1 - _c; \
577 } while (0)
578/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
579 based, and we don't need a special case for x==0 here */
580#define count_trailing_zeros(count, x) \
581 do { \
582 UWtype __ctz_x = (x); \
583 __asm__ ("popcnt %0 = %1" \
584 : "=r" (count) \
585 : "r" ((__ctz_x-1) & ~__ctz_x)); \
586 } while (0)
587#define UMUL_TIME 14
588#endif
589
e9b3e3c5 590#if defined (__M32R__) && W_TYPE_SIZE == 32
1da2d51a
UD
591#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
592 /* The cmp clears the condition bit. */ \
181742f8 593 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
1da2d51a
UD
594 : "=r" ((USItype) (sh)), \
595 "=&r" ((USItype) (sl)) \
181742f8 596 : "0" ((USItype) (ah)), \
1da2d51a 597 "r" ((USItype) (bh)), \
181742f8 598 "1" ((USItype) (al)), \
1da2d51a
UD
599 "r" ((USItype) (bl)) \
600 : "cbit")
601#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
602 /* The cmp clears the condition bit. */ \
181742f8 603 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
1da2d51a
UD
604 : "=r" ((USItype) (sh)), \
605 "=&r" ((USItype) (sl)) \
606 : "0" ((USItype) (ah)), \
607 "r" ((USItype) (bh)), \
608 "1" ((USItype) (al)), \
609 "r" ((USItype) (bl)) \
610 : "cbit")
611#endif /* __M32R__ */
612
e9b3e3c5 613#if defined (__mc68000__) && W_TYPE_SIZE == 32
28f540f4 614#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 615 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
1da2d51a
UD
616 : "=d" ((USItype) (sh)), \
617 "=&d" ((USItype) (sl)) \
618 : "%0" ((USItype) (ah)), \
619 "d" ((USItype) (bh)), \
620 "%1" ((USItype) (al)), \
621 "g" ((USItype) (bl)))
28f540f4 622#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 623 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
1da2d51a
UD
624 : "=d" ((USItype) (sh)), \
625 "=&d" ((USItype) (sl)) \
626 : "0" ((USItype) (ah)), \
627 "d" ((USItype) (bh)), \
628 "1" ((USItype) (al)), \
629 "g" ((USItype) (bl)))
630
f30070ae
RM
631/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
632#if (defined (__mc68020__) && !defined (__mc68060__))
28f540f4
RM
633#define umul_ppmm(w1, w0, u, v) \
634 __asm__ ("mulu%.l %3,%1:%0" \
1da2d51a
UD
635 : "=d" ((USItype) (w0)), \
636 "=d" ((USItype) (w1)) \
637 : "%0" ((USItype) (u)), \
638 "dmi" ((USItype) (v)))
28f540f4
RM
639#define UMUL_TIME 45
640#define udiv_qrnnd(q, r, n1, n0, d) \
641 __asm__ ("divu%.l %4,%1:%0" \
1da2d51a
UD
642 : "=d" ((USItype) (q)), \
643 "=d" ((USItype) (r)) \
644 : "0" ((USItype) (n0)), \
645 "1" ((USItype) (n1)), \
646 "dmi" ((USItype) (d)))
28f540f4
RM
647#define UDIV_TIME 90
648#define sdiv_qrnnd(q, r, n1, n0, d) \
649 __asm__ ("divs%.l %4,%1:%0" \
1da2d51a
UD
650 : "=d" ((USItype) (q)), \
651 "=d" ((USItype) (r)) \
652 : "0" ((USItype) (n0)), \
653 "1" ((USItype) (n1)), \
654 "dmi" ((USItype) (d)))
655
f30070ae
RM
656#elif defined (__mcoldfire__) /* not mc68020 */
657
658#define umul_ppmm(xh, xl, a, b) \
659 __asm__ ("| Inlined umul_ppmm\n" \
660 " move%.l %2,%/d0\n" \
661 " move%.l %3,%/d1\n" \
662 " move%.l %/d0,%/d2\n" \
663 " swap %/d0\n" \
664 " move%.l %/d1,%/d3\n" \
665 " swap %/d1\n" \
666 " move%.w %/d2,%/d4\n" \
667 " mulu %/d3,%/d4\n" \
668 " mulu %/d1,%/d2\n" \
669 " mulu %/d0,%/d3\n" \
670 " mulu %/d0,%/d1\n" \
671 " move%.l %/d4,%/d0\n" \
672 " clr%.w %/d0\n" \
673 " swap %/d0\n" \
674 " add%.l %/d0,%/d2\n" \
675 " add%.l %/d3,%/d2\n" \
676 " jcc 1f\n" \
677 " add%.l %#65536,%/d1\n" \
678 "1: swap %/d2\n" \
679 " moveq %#0,%/d0\n" \
680 " move%.w %/d2,%/d0\n" \
681 " move%.w %/d4,%/d2\n" \
682 " move%.l %/d2,%1\n" \
683 " add%.l %/d1,%/d0\n" \
684 " move%.l %/d0,%0" \
685 : "=g" ((USItype) (xh)), \
686 "=g" ((USItype) (xl)) \
687 : "g" ((USItype) (a)), \
688 "g" ((USItype) (b)) \
689 : "d0", "d1", "d2", "d3", "d4")
690#define UMUL_TIME 100
691#define UDIV_TIME 400
692#else /* not ColdFire */
1da2d51a 693/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
ba848785 694#define umul_ppmm(xh, xl, a, b) \
772d0e1a 695 __asm__ ("| Inlined umul_ppmm\n" \
41b0afab
RM
696 " move%.l %2,%/d0\n" \
697 " move%.l %3,%/d1\n" \
698 " move%.l %/d0,%/d2\n" \
699 " swap %/d0\n" \
700 " move%.l %/d1,%/d3\n" \
701 " swap %/d1\n" \
702 " move%.w %/d2,%/d4\n" \
703 " mulu %/d3,%/d4\n" \
704 " mulu %/d1,%/d2\n" \
705 " mulu %/d0,%/d3\n" \
706 " mulu %/d0,%/d1\n" \
707 " move%.l %/d4,%/d0\n" \
708 " eor%.w %/d0,%/d0\n" \
709 " swap %/d0\n" \
710 " add%.l %/d0,%/d2\n" \
711 " add%.l %/d3,%/d2\n" \
712 " jcc 1f\n" \
713 " add%.l %#65536,%/d1\n" \
714 "1: swap %/d2\n" \
715 " moveq %#0,%/d0\n" \
716 " move%.w %/d2,%/d0\n" \
717 " move%.w %/d4,%/d2\n" \
718 " move%.l %/d2,%1\n" \
719 " add%.l %/d1,%/d0\n" \
720 " move%.l %/d0,%0" \
1da2d51a
UD
721 : "=g" ((USItype) (xh)), \
722 "=g" ((USItype) (xl)) \
723 : "g" ((USItype) (a)), \
724 "g" ((USItype) (b)) \
725 : "d0", "d1", "d2", "d3", "d4")
28f540f4
RM
726#define UMUL_TIME 100
727#define UDIV_TIME 400
f30070ae 728
28f540f4 729#endif /* not mc68020 */
1da2d51a 730
f30070ae
RM
731/* The '020, '030, '040 and '060 have bitfield insns.
732 cpu32 disguises as a 68020, but lacks them. */
733#if defined (__mc68020__) && !defined (__mcpu32__)
1da2d51a
UD
734#define count_leading_zeros(count, x) \
735 __asm__ ("bfffo %1{%b2:%b2},%0" \
736 : "=d" ((USItype) (count)) \
737 : "od" ((USItype) (x)), "n" (0))
24784465
RM
738/* Some ColdFire architectures have a ff1 instruction supported via
739 __builtin_clz. */
740#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
741#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
742#define COUNT_LEADING_ZEROS_0 32
1da2d51a 743#endif
28f540f4
RM
744#endif /* mc68000 */
745
e9b3e3c5 746#if defined (__m88000__) && W_TYPE_SIZE == 32
28f540f4 747#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 748 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
1da2d51a
UD
749 : "=r" ((USItype) (sh)), \
750 "=&r" ((USItype) (sl)) \
751 : "%rJ" ((USItype) (ah)), \
752 "rJ" ((USItype) (bh)), \
753 "%rJ" ((USItype) (al)), \
754 "rJ" ((USItype) (bl)))
28f540f4 755#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 756 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
1da2d51a
UD
757 : "=r" ((USItype) (sh)), \
758 "=&r" ((USItype) (sl)) \
759 : "rJ" ((USItype) (ah)), \
760 "rJ" ((USItype) (bh)), \
761 "rJ" ((USItype) (al)), \
762 "rJ" ((USItype) (bl)))
28f540f4
RM
763#define count_leading_zeros(count, x) \
764 do { \
765 USItype __cbtmp; \
766 __asm__ ("ff1 %0,%1" \
767 : "=r" (__cbtmp) \
1da2d51a 768 : "r" ((USItype) (x))); \
28f540f4
RM
769 (count) = __cbtmp ^ 31; \
770 } while (0)
e9b3e3c5 771#define COUNT_LEADING_ZEROS_0 63 /* sic */
1da2d51a 772#if defined (__mc88110__)
28f540f4
RM
773#define umul_ppmm(wh, wl, u, v) \
774 do { \
775 union {UDItype __ll; \
776 struct {USItype __h, __l;} __i; \
777 } __xx; \
778 __asm__ ("mulu.d %0,%1,%2" \
779 : "=r" (__xx.__ll) \
1da2d51a
UD
780 : "r" ((USItype) (u)), \
781 "r" ((USItype) (v))); \
28f540f4
RM
782 (wh) = __xx.__i.__h; \
783 (wl) = __xx.__i.__l; \
784 } while (0)
785#define udiv_qrnnd(q, r, n1, n0, d) \
786 ({union {UDItype __ll; \
787 struct {USItype __h, __l;} __i; \
788 } __xx; \
789 USItype __q; \
790 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
791 __asm__ ("divu.d %0,%1,%2" \
792 : "=r" (__q) \
793 : "r" (__xx.__ll), \
1da2d51a 794 "r" ((USItype) (d))); \
28f540f4
RM
795 (r) = (n0) - __q * (d); (q) = __q; })
796#define UMUL_TIME 5
797#define UDIV_TIME 25
798#else
799#define UMUL_TIME 17
800#define UDIV_TIME 150
1da2d51a 801#endif /* __mc88110__ */
28f540f4
RM
802#endif /* __m88000__ */
803
def7fbd6
AS
804#if defined (__mn10300__)
805# if defined (__AM33__)
806# define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
807# define umul_ppmm(w1, w0, u, v) \
808 asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
809# define smul_ppmm(w1, w0, u, v) \
810 asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
811# else
812# define umul_ppmm(w1, w0, u, v) \
813 asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
814# define smul_ppmm(w1, w0, u, v) \
815 asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
816# endif
817# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
818 do { \
819 DWunion __s, __a, __b; \
820 __a.s.low = (al); __a.s.high = (ah); \
821 __b.s.low = (bl); __b.s.high = (bh); \
822 __s.ll = __a.ll + __b.ll; \
823 (sl) = __s.s.low; (sh) = __s.s.high; \
824 } while (0)
825# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
826 do { \
827 DWunion __s, __a, __b; \
828 __a.s.low = (al); __a.s.high = (ah); \
829 __b.s.low = (bl); __b.s.high = (bh); \
830 __s.ll = __a.ll - __b.ll; \
831 (sl) = __s.s.low; (sh) = __s.s.high; \
832 } while (0)
833# define udiv_qrnnd(q, r, nh, nl, d) \
834 asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
835# define sdiv_qrnnd(q, r, nh, nl, d) \
836 asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
837# define UMUL_TIME 3
838# define UDIV_TIME 38
839#endif
840
e9b3e3c5 841#if defined (__mips__) && W_TYPE_SIZE == 32
24784465
RM
842#define umul_ppmm(w1, w0, u, v) \
843 do { \
844 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
845 (w1) = (USItype) (__x >> 32); \
846 (w0) = (USItype) (__x); \
847 } while (0)
28f540f4
RM
848#define UMUL_TIME 10
849#define UDIV_TIME 100
24784465 850
6426d77e 851#if (__mips == 32 || __mips == 64) && ! defined (__mips16)
24784465
RM
852#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
853#define COUNT_LEADING_ZEROS_0 32
854#endif
28f540f4
RM
855#endif /* __mips__ */
856
e9b3e3c5 857#if defined (__ns32000__) && W_TYPE_SIZE == 32
28f540f4
RM
858#define umul_ppmm(w1, w0, u, v) \
859 ({union {UDItype __ll; \
860 struct {USItype __l, __h;} __i; \
861 } __xx; \
862 __asm__ ("meid %2,%0" \
863 : "=g" (__xx.__ll) \
1da2d51a
UD
864 : "%0" ((USItype) (u)), \
865 "g" ((USItype) (v))); \
28f540f4
RM
866 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
867#define __umulsidi3(u, v) \
868 ({UDItype __w; \
869 __asm__ ("meid %2,%0" \
870 : "=g" (__w) \
1da2d51a
UD
871 : "%0" ((USItype) (u)), \
872 "g" ((USItype) (v))); \
28f540f4
RM
873 __w; })
874#define udiv_qrnnd(q, r, n1, n0, d) \
875 ({union {UDItype __ll; \
876 struct {USItype __l, __h;} __i; \
877 } __xx; \
878 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
879 __asm__ ("deid %2,%0" \
880 : "=g" (__xx.__ll) \
881 : "0" (__xx.__ll), \
1da2d51a 882 "g" ((USItype) (d))); \
28f540f4 883 (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
62818cfd 884#define count_trailing_zeros(count,x) \
41b0afab
RM
885 do { \
886 __asm__ ("ffsd %2,%0" \
48693bea
AK
887 : "=r" ((USItype) (count)) \
888 : "0" ((USItype) 0), \
889 "r" ((USItype) (x))); \
62818cfd 890 } while (0)
28f540f4
RM
891#endif /* __ns32000__ */
892
41b0afab
RM
893/* FIXME: We should test _IBMR2 here when we add assembly support for the
894 system vendor compilers.
895 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
896 enough, since that hits ARM and m68k too. */
897#if (defined (_ARCH_PPC) /* AIX */ \
41b0afab
RM
898 || defined (__powerpc__) /* gcc */ \
899 || defined (__POWERPC__) /* BEOS */ \
900 || defined (__ppc__) /* Darwin */ \
24784465
RM
901 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
902 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
48693bea 903 && CPU_FAMILY == PPC) \
41b0afab 904 ) && W_TYPE_SIZE == 32
28f540f4
RM
905#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
906 do { \
907 if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 908 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
41b0afab
RM
909 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
910 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
c3c8283c 911 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
41b0afab 912 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
28f540f4 913 else \
c3c8283c 914 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
41b0afab
RM
915 : "=r" (sh), "=&r" (sl) \
916 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
28f540f4
RM
917 } while (0)
918#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
919 do { \
920 if (__builtin_constant_p (ah) && (ah) == 0) \
c3c8283c 921 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
41b0afab
RM
922 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
923 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
c3c8283c 924 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
41b0afab 925 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
28f540f4 926 else if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 927 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
41b0afab
RM
928 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
929 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
c3c8283c 930 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
41b0afab 931 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
28f540f4 932 else \
c3c8283c 933 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
41b0afab
RM
934 : "=r" (sh), "=&r" (sl) \
935 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
28f540f4
RM
936 } while (0)
937#define count_leading_zeros(count, x) \
c3c8283c 938 __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
e9b3e3c5 939#define COUNT_LEADING_ZEROS_0 32
41b0afab 940#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
24784465
RM
941 || defined (__ppc__) \
942 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
943 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
48693bea 944 && CPU_FAMILY == PPC)
28f540f4
RM
945#define umul_ppmm(ph, pl, m0, m1) \
946 do { \
947 USItype __m0 = (m0), __m1 = (m1); \
41b0afab 948 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
28f540f4
RM
949 (pl) = __m0 * __m1; \
950 } while (0)
951#define UMUL_TIME 15
952#define smul_ppmm(ph, pl, m0, m1) \
953 do { \
954 SItype __m0 = (m0), __m1 = (m1); \
41b0afab 955 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
28f540f4
RM
956 (pl) = __m0 * __m1; \
957 } while (0)
958#define SMUL_TIME 14
959#define UDIV_TIME 120
28f540f4 960#endif
41b0afab
RM
961#endif /* 32-bit POWER architecture variants. */
962
963/* We should test _IBMR2 here when we add assembly support for the system
964 vendor compilers. */
965#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
09af82c9
RM
966#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
967 do { \
968 if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 969 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
41b0afab
RM
970 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
971 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
c3c8283c 972 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
41b0afab 973 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
09af82c9 974 else \
c3c8283c 975 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
41b0afab
RM
976 : "=r" (sh), "=&r" (sl) \
977 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
09af82c9
RM
978 } while (0)
979#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
980 do { \
981 if (__builtin_constant_p (ah) && (ah) == 0) \
c3c8283c 982 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
41b0afab
RM
983 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
984 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
c3c8283c 985 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
41b0afab 986 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
09af82c9 987 else if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 988 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
41b0afab
RM
989 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
990 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
c3c8283c 991 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
41b0afab 992 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
09af82c9 993 else \
c3c8283c 994 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
41b0afab
RM
995 : "=r" (sh), "=&r" (sl) \
996 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
09af82c9 997 } while (0)
09af82c9 998#define count_leading_zeros(count, x) \
41b0afab 999 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
09af82c9 1000#define COUNT_LEADING_ZEROS_0 64
09af82c9
RM
1001#define umul_ppmm(ph, pl, m0, m1) \
1002 do { \
1003 UDItype __m0 = (m0), __m1 = (m1); \
41b0afab 1004 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
09af82c9
RM
1005 (pl) = __m0 * __m1; \
1006 } while (0)
41b0afab 1007#define UMUL_TIME 15
09af82c9
RM
1008#define smul_ppmm(ph, pl, m0, m1) \
1009 do { \
1010 DItype __m0 = (m0), __m1 = (m1); \
41b0afab 1011 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
09af82c9
RM
1012 (pl) = __m0 * __m1; \
1013 } while (0)
41b0afab
RM
1014#define SMUL_TIME 14 /* ??? */
1015#define UDIV_TIME 120 /* ??? */
1016#endif /* 64-bit PowerPC. */
28f540f4 1017
e9b3e3c5 1018#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
28f540f4 1019#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1020 __asm__ ("a %1,%5\n\tae %0,%3" \
1da2d51a
UD
1021 : "=r" ((USItype) (sh)), \
1022 "=&r" ((USItype) (sl)) \
1023 : "%0" ((USItype) (ah)), \
1024 "r" ((USItype) (bh)), \
1025 "%1" ((USItype) (al)), \
1026 "r" ((USItype) (bl)))
28f540f4 1027#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1028 __asm__ ("s %1,%5\n\tse %0,%3" \
1da2d51a
UD
1029 : "=r" ((USItype) (sh)), \
1030 "=&r" ((USItype) (sl)) \
1031 : "0" ((USItype) (ah)), \
1032 "r" ((USItype) (bh)), \
1033 "1" ((USItype) (al)), \
1034 "r" ((USItype) (bl)))
28f540f4
RM
1035#define umul_ppmm(ph, pl, m0, m1) \
1036 do { \
1037 USItype __m0 = (m0), __m1 = (m1); \
1038 __asm__ ( \
41b0afab
RM
1039 "s r2,r2\n" \
1040" mts r10,%2\n" \
1041" m r2,%3\n" \
1042" m r2,%3\n" \
1043" m r2,%3\n" \
1044" m r2,%3\n" \
1045" m r2,%3\n" \
1046" m r2,%3\n" \
1047" m r2,%3\n" \
1048" m r2,%3\n" \
1049" m r2,%3\n" \
1050" m r2,%3\n" \
1051" m r2,%3\n" \
1052" m r2,%3\n" \
1053" m r2,%3\n" \
1054" m r2,%3\n" \
1055" m r2,%3\n" \
1056" m r2,%3\n" \
1057" cas %0,r2,r0\n" \
1058" mfs r10,%1" \
1da2d51a
UD
1059 : "=r" ((USItype) (ph)), \
1060 "=r" ((USItype) (pl)) \
28f540f4
RM
1061 : "%r" (__m0), \
1062 "r" (__m1) \
1063 : "r2"); \
1064 (ph) += ((((SItype) __m0 >> 31) & __m1) \
1065 + (((SItype) __m1 >> 31) & __m0)); \
1066 } while (0)
1067#define UMUL_TIME 20
1068#define UDIV_TIME 200
1069#define count_leading_zeros(count, x) \
1070 do { \
1071 if ((x) >= 0x10000) \
1072 __asm__ ("clz %0,%1" \
1da2d51a
UD
1073 : "=r" ((USItype) (count)) \
1074 : "r" ((USItype) (x) >> 16)); \
28f540f4
RM
1075 else \
1076 { \
1077 __asm__ ("clz %0,%1" \
1da2d51a
UD
1078 : "=r" ((USItype) (count)) \
1079 : "r" ((USItype) (x))); \
28f540f4
RM
1080 (count) += 16; \
1081 } \
1082 } while (0)
8f5ca04b
RM
1083#endif
1084
24784465
RM
1085#if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1086#ifndef __sh1__
e9b3e3c5
UD
1087#define umul_ppmm(w1, w0, u, v) \
1088 __asm__ ( \
24784465
RM
1089 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
1090 : "=r<" ((USItype)(w1)), \
1091 "=r<" ((USItype)(w0)) \
e9b3e3c5
UD
1092 : "r" ((USItype)(u)), \
1093 "r" ((USItype)(v)) \
1094 : "macl", "mach")
1095#define UMUL_TIME 5
1096#endif
1097
24784465
RM
1098/* This is the same algorithm as __udiv_qrnnd_c. */
1099#define UDIV_NEEDS_NORMALIZATION 1
1100
1101#define udiv_qrnnd(q, r, n1, n0, d) \
1102 do { \
1103 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
48693bea 1104 __attribute__ ((visibility ("hidden"))); \
24784465
RM
1105 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1106 __asm__ ( \
1107 "mov%M4 %4,r5\n" \
1108" swap.w %3,r4\n" \
1109" swap.w r5,r6\n" \
1110" jsr @%5\n" \
1111" shll16 r6\n" \
1112" swap.w r4,r4\n" \
1113" jsr @%5\n" \
1114" swap.w r1,%0\n" \
1115" or r1,%0" \
1116 : "=r" (q), "=&z" (r) \
1117 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
78fd882a 1118 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
24784465
RM
1119 } while (0)
1120
1121#define UDIV_TIME 80
1122
1123#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1124 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
1125 : "=r" (sh), "=r" (sl) \
def7fbd6 1126 : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
24784465
RM
1127
1128#endif /* __sh__ */
1129
41b0afab
RM
1130#if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1131#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1132#define count_leading_zeros(count, x) \
1133 do \
1134 { \
1135 UDItype x_ = (USItype)(x); \
1136 SItype c_; \
1137 \
1138 __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_)); \
1139 (count) = c_ - 31; \
1140 } \
1141 while (0)
1142#define COUNT_LEADING_ZEROS_0 32
1143#endif
1144
1145#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1146 && W_TYPE_SIZE == 32
28f540f4 1147#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1148 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
1da2d51a
UD
1149 : "=r" ((USItype) (sh)), \
1150 "=&r" ((USItype) (sl)) \
1151 : "%rJ" ((USItype) (ah)), \
1152 "rI" ((USItype) (bh)), \
1153 "%rJ" ((USItype) (al)), \
1154 "rI" ((USItype) (bl)) \
28f540f4
RM
1155 __CLOBBER_CC)
1156#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1157 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
1da2d51a
UD
1158 : "=r" ((USItype) (sh)), \
1159 "=&r" ((USItype) (sl)) \
1160 : "rJ" ((USItype) (ah)), \
1161 "rI" ((USItype) (bh)), \
1162 "rJ" ((USItype) (al)), \
1163 "rI" ((USItype) (bl)) \
28f540f4 1164 __CLOBBER_CC)
402fe938
DM
1165#if defined (__sparc_v9__)
1166#define umul_ppmm(w1, w0, u, v) \
1167 do { \
1168 register USItype __g1 asm ("g1"); \
1169 __asm__ ("umul\t%2,%3,%1\n\t" \
1170 "srlx\t%1, 32, %0" \
1171 : "=r" ((USItype) (w1)), \
1172 "=r" (__g1) \
1173 : "r" ((USItype) (u)), \
1174 "r" ((USItype) (v))); \
1175 (w0) = __g1; \
1176 } while (0)
1177#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1178 __asm__ ("mov\t%2,%%y\n\t" \
1179 "udiv\t%3,%4,%0\n\t" \
1180 "umul\t%0,%4,%1\n\t" \
1181 "sub\t%3,%1,%1" \
1182 : "=&r" ((USItype) (__q)), \
1183 "=&r" ((USItype) (__r)) \
1184 : "r" ((USItype) (__n1)), \
1185 "r" ((USItype) (__n0)), \
1186 "r" ((USItype) (__d)))
1187#else
28f540f4 1188#if defined (__sparc_v8__)
28f540f4
RM
1189#define umul_ppmm(w1, w0, u, v) \
1190 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1da2d51a
UD
1191 : "=r" ((USItype) (w1)), \
1192 "=r" ((USItype) (w0)) \
1193 : "r" ((USItype) (u)), \
1194 "r" ((USItype) (v)))
41b0afab 1195#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1da2d51a 1196 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
41b0afab
RM
1197 : "=&r" ((USItype) (__q)), \
1198 "=&r" ((USItype) (__r)) \
1199 : "r" ((USItype) (__n1)), \
1200 "r" ((USItype) (__n0)), \
1201 "r" ((USItype) (__d)))
1da2d51a 1202#else
28f540f4
RM
1203#if defined (__sparclite__)
1204/* This has hardware multiply but not divide. It also has two additional
1205 instructions scan (ffs from high bit) and divscc. */
1206#define umul_ppmm(w1, w0, u, v) \
1207 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1da2d51a
UD
1208 : "=r" ((USItype) (w1)), \
1209 "=r" ((USItype) (w0)) \
1210 : "r" ((USItype) (u)), \
1211 "r" ((USItype) (v)))
28f540f4 1212#define udiv_qrnnd(q, r, n1, n0, d) \
772d0e1a 1213 __asm__ ("! Inlined udiv_qrnnd\n" \
41b0afab
RM
1214" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1215" tst %%g0\n" \
1216" divscc %3,%4,%%g1\n" \
1217" divscc %%g1,%4,%%g1\n" \
1218" divscc %%g1,%4,%%g1\n" \
1219" divscc %%g1,%4,%%g1\n" \
1220" divscc %%g1,%4,%%g1\n" \
1221" divscc %%g1,%4,%%g1\n" \
1222" divscc %%g1,%4,%%g1\n" \
1223" divscc %%g1,%4,%%g1\n" \
1224" divscc %%g1,%4,%%g1\n" \
1225" divscc %%g1,%4,%%g1\n" \
1226" divscc %%g1,%4,%%g1\n" \
1227" divscc %%g1,%4,%%g1\n" \
1228" divscc %%g1,%4,%%g1\n" \
1229" divscc %%g1,%4,%%g1\n" \
1230" divscc %%g1,%4,%%g1\n" \
1231" divscc %%g1,%4,%%g1\n" \
1232" divscc %%g1,%4,%%g1\n" \
1233" divscc %%g1,%4,%%g1\n" \
1234" divscc %%g1,%4,%%g1\n" \
1235" divscc %%g1,%4,%%g1\n" \
1236" divscc %%g1,%4,%%g1\n" \
1237" divscc %%g1,%4,%%g1\n" \
1238" divscc %%g1,%4,%%g1\n" \
1239" divscc %%g1,%4,%%g1\n" \
1240" divscc %%g1,%4,%%g1\n" \
1241" divscc %%g1,%4,%%g1\n" \
1242" divscc %%g1,%4,%%g1\n" \
1243" divscc %%g1,%4,%%g1\n" \
1244" divscc %%g1,%4,%%g1\n" \
1245" divscc %%g1,%4,%%g1\n" \
1246" divscc %%g1,%4,%%g1\n" \
1247" divscc %%g1,%4,%0\n" \
1248" rd %%y,%1\n" \
1249" bl,a 1f\n" \
1250" add %1,%4,%1\n" \
772d0e1a 1251"1: ! End of inline udiv_qrnnd" \
1da2d51a
UD
1252 : "=r" ((USItype) (q)), \
1253 "=r" ((USItype) (r)) \
1254 : "r" ((USItype) (n1)), \
1255 "r" ((USItype) (n0)), \
1256 "rI" ((USItype) (d)) \
e9b3e3c5 1257 : "g1" __AND_CLOBBER_CC)
28f540f4
RM
1258#define UDIV_TIME 37
1259#define count_leading_zeros(count, x) \
41b0afab
RM
1260 do { \
1261 __asm__ ("scan %1,1,%0" \
48693bea
AK
1262 : "=r" ((USItype) (count)) \
1263 : "r" ((USItype) (x))); \
62818cfd 1264 } while (0)
e9b3e3c5
UD
1265/* Early sparclites return 63 for an argument of 0, but they warn that future
1266 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1267 undefined. */
1da2d51a
UD
1268#else
1269/* SPARC without integer multiplication and divide instructions.
1270 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
28f540f4 1271#define umul_ppmm(w1, w0, u, v) \
772d0e1a 1272 __asm__ ("! Inlined umul_ppmm\n" \
41b0afab
RM
1273" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
1274" sra %3,31,%%o5 ! Don't move this insn\n" \
1275" and %2,%%o5,%%o5 ! Don't move this insn\n" \
1276" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1277" mulscc %%g1,%3,%%g1\n" \
1278" mulscc %%g1,%3,%%g1\n" \
1279" mulscc %%g1,%3,%%g1\n" \
1280" mulscc %%g1,%3,%%g1\n" \
1281" mulscc %%g1,%3,%%g1\n" \
1282" mulscc %%g1,%3,%%g1\n" \
1283" mulscc %%g1,%3,%%g1\n" \
1284" mulscc %%g1,%3,%%g1\n" \
1285" mulscc %%g1,%3,%%g1\n" \
1286" mulscc %%g1,%3,%%g1\n" \
1287" mulscc %%g1,%3,%%g1\n" \
1288" mulscc %%g1,%3,%%g1\n" \
1289" mulscc %%g1,%3,%%g1\n" \
1290" mulscc %%g1,%3,%%g1\n" \
1291" mulscc %%g1,%3,%%g1\n" \
1292" mulscc %%g1,%3,%%g1\n" \
1293" mulscc %%g1,%3,%%g1\n" \
1294" mulscc %%g1,%3,%%g1\n" \
1295" mulscc %%g1,%3,%%g1\n" \
1296" mulscc %%g1,%3,%%g1\n" \
1297" mulscc %%g1,%3,%%g1\n" \
1298" mulscc %%g1,%3,%%g1\n" \
1299" mulscc %%g1,%3,%%g1\n" \
1300" mulscc %%g1,%3,%%g1\n" \
1301" mulscc %%g1,%3,%%g1\n" \
1302" mulscc %%g1,%3,%%g1\n" \
1303" mulscc %%g1,%3,%%g1\n" \
1304" mulscc %%g1,%3,%%g1\n" \
1305" mulscc %%g1,%3,%%g1\n" \
1306" mulscc %%g1,%3,%%g1\n" \
1307" mulscc %%g1,%3,%%g1\n" \
1308" mulscc %%g1,%3,%%g1\n" \
1309" mulscc %%g1,0,%%g1\n" \
1310" add %%g1,%%o5,%0\n" \
1311" rd %%y,%1" \
1da2d51a
UD
1312 : "=r" ((USItype) (w1)), \
1313 "=r" ((USItype) (w0)) \
1314 : "%rI" ((USItype) (u)), \
1315 "r" ((USItype) (v)) \
e9b3e3c5 1316 : "g1", "o5" __AND_CLOBBER_CC)
28f540f4 1317#define UMUL_TIME 39 /* 39 instructions */
390a4882 1318/* It's quite necessary to add this much assembler for the sparc.
41b0afab
RM
1319 The default udiv_qrnnd (in C) is more than 10 times slower! */
1320#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
772d0e1a
AJ
1321 __asm__ ("! Inlined udiv_qrnnd\n" \
1322" mov 32,%%g1\n" \
1323" subcc %1,%2,%%g0\n" \
1324"1: bcs 5f\n" \
41b0afab 1325" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
772d0e1a
AJ
1326" sub %1,%2,%1 ! this kills msb of n\n" \
1327" addx %1,%1,%1 ! so this can't give carry\n" \
1328" subcc %%g1,1,%%g1\n" \
1329"2: bne 1b\n" \
41b0afab 1330" subcc %1,%2,%%g0\n" \
772d0e1a 1331" bcs 3f\n" \
41b0afab 1332" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
772d0e1a 1333" b 3f\n" \
41b0afab 1334" sub %1,%2,%1 ! this kills msb of n\n" \
772d0e1a
AJ
1335"4: sub %1,%2,%1\n" \
1336"5: addxcc %1,%1,%1\n" \
1337" bcc 2b\n" \
41b0afab 1338" subcc %%g1,1,%%g1\n" \
772d0e1a
AJ
1339"! Got carry from n. Subtract next step to cancel this carry.\n" \
1340" bne 4b\n" \
41b0afab 1341" addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
772d0e1a
AJ
1342" sub %1,%2,%1\n" \
1343"3: xnor %0,0,%0\n" \
41b0afab
RM
1344" ! End of inline udiv_qrnnd" \
1345 : "=&r" ((USItype) (__q)), \
1346 "=&r" ((USItype) (__r)) \
1347 : "r" ((USItype) (__d)), \
1348 "1" ((USItype) (__n1)), \
1349 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1350#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
1da2d51a
UD
1351#endif /* __sparclite__ */
1352#endif /* __sparc_v8__ */
402fe938 1353#endif /* __sparc_v9__ */
41b0afab 1354#endif /* sparc32 */
28f540f4 1355
41b0afab
RM
1356#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1357 && W_TYPE_SIZE == 64
e9b3e3c5 1358#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
402fe938
DM
1359 do { \
1360 UDItype __carry = 0; \
1361 __asm__ ("addcc\t%r5,%6,%1\n\t" \
1362 "add\t%r3,%4,%0\n\t" \
1363 "movcs\t%%xcc, 1, %2\n\t" \
1364 "add\t%0, %2, %0" \
1365 : "=r" ((UDItype)(sh)), \
1366 "=&r" ((UDItype)(sl)), \
1367 "+r" (__carry) \
1368 : "%rJ" ((UDItype)(ah)), \
1369 "rI" ((UDItype)(bh)), \
1370 "%rJ" ((UDItype)(al)), \
1371 "rI" ((UDItype)(bl)) \
1372 __CLOBBER_CC); \
1373 } while (0)
e9b3e3c5 1374
402fe938
DM
1375#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1376 do { \
1377 UDItype __carry = 0; \
1378 __asm__ ("subcc\t%r5,%6,%1\n\t" \
1379 "sub\t%r3,%4,%0\n\t" \
1380 "movcs\t%%xcc, 1, %2\n\t" \
2fd6ff13 1381 "sub\t%0, %2, %0" \
402fe938
DM
1382 : "=r" ((UDItype)(sh)), \
1383 "=&r" ((UDItype)(sl)), \
1384 "+r" (__carry) \
1385 : "%rJ" ((UDItype)(ah)), \
1386 "rI" ((UDItype)(bh)), \
1387 "%rJ" ((UDItype)(al)), \
1388 "rI" ((UDItype)(bl)) \
1389 __CLOBBER_CC); \
1390 } while (0)
e9b3e3c5
UD
1391
1392#define umul_ppmm(wh, wl, u, v) \
1393 do { \
1394 UDItype tmp1, tmp2, tmp3, tmp4; \
1395 __asm__ __volatile__ ( \
41b0afab
RM
1396 "srl %7,0,%3\n\t" \
1397 "mulx %3,%6,%1\n\t" \
1398 "srlx %6,32,%2\n\t" \
1399 "mulx %2,%3,%4\n\t" \
1400 "sllx %4,32,%5\n\t" \
1401 "srl %6,0,%3\n\t" \
1402 "sub %1,%5,%5\n\t" \
1403 "srlx %5,32,%5\n\t" \
1404 "addcc %4,%5,%4\n\t" \
1405 "srlx %7,32,%5\n\t" \
1406 "mulx %3,%5,%3\n\t" \
1407 "mulx %2,%5,%5\n\t" \
1408 "sethi %%hi(0x80000000),%2\n\t" \
1409 "addcc %4,%3,%4\n\t" \
1410 "srlx %4,32,%4\n\t" \
1411 "add %2,%2,%2\n\t" \
1412 "movcc %%xcc,%%g0,%2\n\t" \
1413 "addcc %5,%4,%5\n\t" \
1414 "sllx %3,32,%3\n\t" \
1415 "add %1,%3,%1\n\t" \
772d0e1a 1416 "add %5,%2,%0" \
e9b3e3c5
UD
1417 : "=r" ((UDItype)(wh)), \
1418 "=&r" ((UDItype)(wl)), \
1419 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
1420 : "r" ((UDItype)(u)), \
1421 "r" ((UDItype)(v)) \
1422 __CLOBBER_CC); \
1423 } while (0)
1424#define UMUL_TIME 96
1425#define UDIV_TIME 230
313fed01 1426#endif /* sparc64 */
e9b3e3c5
UD
1427
1428#if defined (__vax__) && W_TYPE_SIZE == 32
28f540f4 1429#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1430 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
1da2d51a
UD
1431 : "=g" ((USItype) (sh)), \
1432 "=&g" ((USItype) (sl)) \
1433 : "%0" ((USItype) (ah)), \
1434 "g" ((USItype) (bh)), \
1435 "%1" ((USItype) (al)), \
1436 "g" ((USItype) (bl)))
28f540f4 1437#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1438 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
1da2d51a
UD
1439 : "=g" ((USItype) (sh)), \
1440 "=&g" ((USItype) (sl)) \
1441 : "0" ((USItype) (ah)), \
1442 "g" ((USItype) (bh)), \
1443 "1" ((USItype) (al)), \
1444 "g" ((USItype) (bl)))
28f540f4
RM
1445#define umul_ppmm(xh, xl, m0, m1) \
1446 do { \
1da2d51a
UD
1447 union { \
1448 UDItype __ll; \
1449 struct {USItype __l, __h;} __i; \
1450 } __xx; \
28f540f4
RM
1451 USItype __m0 = (m0), __m1 = (m1); \
1452 __asm__ ("emul %1,%2,$0,%0" \
1da2d51a 1453 : "=r" (__xx.__ll) \
28f540f4
RM
1454 : "g" (__m0), \
1455 "g" (__m1)); \
1da2d51a
UD
1456 (xh) = __xx.__i.__h; \
1457 (xl) = __xx.__i.__l; \
28f540f4
RM
1458 (xh) += ((((SItype) __m0 >> 31) & __m1) \
1459 + (((SItype) __m1 >> 31) & __m0)); \
1460 } while (0)
1461#define sdiv_qrnnd(q, r, n1, n0, d) \
1462 do { \
1463 union {DItype __ll; \
1464 struct {SItype __l, __h;} __i; \
1465 } __xx; \
1466 __xx.__i.__h = n1; __xx.__i.__l = n0; \
1467 __asm__ ("ediv %3,%2,%0,%1" \
1468 : "=g" (q), "=g" (r) \
1da2d51a 1469 : "g" (__xx.__ll), "g" (d)); \
28f540f4
RM
1470 } while (0)
1471#endif /* __vax__ */
1472
8115f29b
L
1473#ifdef _TMS320C6X
1474#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1475 do \
1476 { \
1477 UDItype __ll; \
1478 __asm__ ("addu .l1 %1, %2, %0" \
1479 : "=a" (__ll) : "a" (al), "a" (bl)); \
1480 (sl) = (USItype)__ll; \
1481 (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \
1482 } \
1483 while (0)
1484
1485#ifdef _TMS320C6400_PLUS
1486#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1487#define umul_ppmm(w1, w0, u, v) \
1488 do { \
1489 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
1490 (w1) = (USItype) (__x >> 32); \
1491 (w0) = (USItype) (__x); \
1492 } while (0)
1493#endif /* _TMS320C6400_PLUS */
1494
1495#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
1496#ifdef _TMS320C6400
1497#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
1498#endif
1499#define UMUL_TIME 4
1500#define UDIV_TIME 40
1501#endif /* _TMS320C6X */
1502
24784465
RM
1503#if defined (__xtensa__) && W_TYPE_SIZE == 32
1504/* This code is not Xtensa-configuration-specific, so rely on the compiler
1505 to expand builtin functions depending on what configuration features
1506 are available. This avoids library calls when the operation can be
1507 performed in-line. */
1508#define umul_ppmm(w1, w0, u, v) \
1509 do { \
1510 DWunion __w; \
1511 __w.ll = __builtin_umulsidi3 (u, v); \
1512 w1 = __w.s.high; \
1513 w0 = __w.s.low; \
1514 } while (0)
1515#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
1516#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
1517#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
1518#endif /* __xtensa__ */
1519
def7fbd6
AS
1520#if defined xstormy16
1521extern UHItype __stormy16_count_leading_zeros (UHItype);
1522#define count_leading_zeros(count, x) \
1523 do \
1524 { \
1525 UHItype size; \
1526 \
1527 /* We assume that W_TYPE_SIZE is a multiple of 16... */ \
1528 for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
1529 { \
1530 UHItype c; \
1531 \
1532 c = __clzhi2 ((x) >> (size - 16)); \
1533 (count) += c; \
1534 if (c != 16) \
1535 break; \
1536 } \
1537 } \
1538 while (0)
1539#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1540#endif
1541
e9b3e3c5
UD
1542#if defined (__z8000__) && W_TYPE_SIZE == 16
1543#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1544 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1545 : "=r" ((unsigned int)(sh)), \
1546 "=&r" ((unsigned int)(sl)) \
1547 : "%0" ((unsigned int)(ah)), \
1548 "r" ((unsigned int)(bh)), \
1549 "%1" ((unsigned int)(al)), \
1550 "rQR" ((unsigned int)(bl)))
1551#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1552 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1553 : "=r" ((unsigned int)(sh)), \
1554 "=&r" ((unsigned int)(sl)) \
1555 : "0" ((unsigned int)(ah)), \
1556 "r" ((unsigned int)(bh)), \
1557 "1" ((unsigned int)(al)), \
1558 "rQR" ((unsigned int)(bl)))
1559#define umul_ppmm(xh, xl, m0, m1) \
1560 do { \
1561 union {long int __ll; \
1562 struct {unsigned int __h, __l;} __i; \
1563 } __xx; \
1564 unsigned int __m0 = (m0), __m1 = (m1); \
1565 __asm__ ("mult %S0,%H3" \
1566 : "=r" (__xx.__i.__h), \
1567 "=r" (__xx.__i.__l) \
1568 : "%1" (__m0), \
1569 "rQR" (__m1)); \
1570 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1571 (xh) += ((((signed int) __m0 >> 15) & __m1) \
1572 + (((signed int) __m1 >> 15) & __m0)); \
1573 } while (0)
1574#endif /* __z8000__ */
1575
28f540f4
RM
1576#endif /* __GNUC__ */
1577
28f540f4
RM
1578/* If this machine has no inline assembler, use C macros. */
1579
1580#if !defined (add_ssaaaa)
1581#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1582 do { \
e9b3e3c5 1583 UWtype __x; \
28f540f4
RM
1584 __x = (al) + (bl); \
1585 (sh) = (ah) + (bh) + (__x < (al)); \
1586 (sl) = __x; \
1587 } while (0)
1588#endif
1589
1590#if !defined (sub_ddmmss)
1591#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1592 do { \
e9b3e3c5 1593 UWtype __x; \
28f540f4
RM
1594 __x = (al) - (bl); \
1595 (sh) = (ah) - (bh) - (__x > (al)); \
1596 (sl) = __x; \
1597 } while (0)
1598#endif
1599
f30070ae
RM
1600/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1601 smul_ppmm. */
1602#if !defined (umul_ppmm) && defined (smul_ppmm)
1603#define umul_ppmm(w1, w0, u, v) \
1604 do { \
1605 UWtype __w1; \
1606 UWtype __xm0 = (u), __xm1 = (v); \
1607 smul_ppmm (__w1, w0, __xm0, __xm1); \
1608 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1609 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1610 } while (0)
1611#endif
1612
1613/* If we still don't have umul_ppmm, define it using plain C. */
28f540f4
RM
1614#if !defined (umul_ppmm)
1615#define umul_ppmm(w1, w0, u, v) \
1616 do { \
e9b3e3c5
UD
1617 UWtype __x0, __x1, __x2, __x3; \
1618 UHWtype __ul, __vl, __uh, __vh; \
28f540f4 1619 \
1da2d51a
UD
1620 __ul = __ll_lowpart (u); \
1621 __uh = __ll_highpart (u); \
1622 __vl = __ll_lowpart (v); \
1623 __vh = __ll_highpart (v); \
28f540f4 1624 \
e9b3e3c5
UD
1625 __x0 = (UWtype) __ul * __vl; \
1626 __x1 = (UWtype) __ul * __vh; \
1627 __x2 = (UWtype) __uh * __vl; \
1628 __x3 = (UWtype) __uh * __vh; \
28f540f4
RM
1629 \
1630 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1631 __x1 += __x2; /* but this indeed can */ \
1632 if (__x1 < __x2) /* did we get it? */ \
41b0afab 1633 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
28f540f4
RM
1634 \
1635 (w1) = __x3 + __ll_highpart (__x1); \
1da2d51a 1636 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
28f540f4
RM
1637 } while (0)
1638#endif
1639
1da2d51a
UD
1640#if !defined (__umulsidi3)
1641#define __umulsidi3(u, v) \
41b0afab 1642 ({DWunion __w; \
1da2d51a
UD
1643 umul_ppmm (__w.s.high, __w.s.low, u, v); \
1644 __w.ll; })
8f5ca04b
RM
1645#endif
1646
28f540f4
RM
1647/* Define this unconditionally, so it can be used for debugging. */
1648#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1649 do { \
e9b3e3c5
UD
1650 UWtype __d1, __d0, __q1, __q0; \
1651 UWtype __r1, __r0, __m; \
28f540f4
RM
1652 __d1 = __ll_highpart (d); \
1653 __d0 = __ll_lowpart (d); \
1654 \
1655 __r1 = (n1) % __d1; \
1656 __q1 = (n1) / __d1; \
e9b3e3c5 1657 __m = (UWtype) __q1 * __d0; \
28f540f4
RM
1658 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
1659 if (__r1 < __m) \
1660 { \
1661 __q1--, __r1 += (d); \
1662 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1663 if (__r1 < __m) \
1664 __q1--, __r1 += (d); \
1665 } \
1666 __r1 -= __m; \
1667 \
1668 __r0 = __r1 % __d1; \
1669 __q0 = __r1 / __d1; \
e9b3e3c5 1670 __m = (UWtype) __q0 * __d0; \
28f540f4
RM
1671 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1672 if (__r0 < __m) \
1673 { \
1674 __q0--, __r0 += (d); \
1675 if (__r0 >= (d)) \
1676 if (__r0 < __m) \
1677 __q0--, __r0 += (d); \
1678 } \
1679 __r0 -= __m; \
1680 \
e9b3e3c5 1681 (q) = (UWtype) __q1 * __ll_B | __q0; \
28f540f4
RM
1682 (r) = __r0; \
1683 } while (0)
1684
1685/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1686 __udiv_w_sdiv (defined in libgcc or elsewhere). */
1687#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1688#define udiv_qrnnd(q, r, nh, nl, d) \
1689 do { \
d3c827e7
AK
1690 extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \
1691 UWtype __r; \
1da2d51a 1692 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
28f540f4
RM
1693 (r) = __r; \
1694 } while (0)
1695#endif
1696
1697/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1698#if !defined (udiv_qrnnd)
1699#define UDIV_NEEDS_NORMALIZATION 1
1700#define udiv_qrnnd __udiv_qrnnd_c
1701#endif
1702
1703#if !defined (count_leading_zeros)
28f540f4
RM
1704#define count_leading_zeros(count, x) \
1705 do { \
e9b3e3c5
UD
1706 UWtype __xr = (x); \
1707 UWtype __a; \
28f540f4 1708 \
e9b3e3c5 1709 if (W_TYPE_SIZE <= 32) \
28f540f4 1710 { \
e9b3e3c5
UD
1711 __a = __xr < ((UWtype)1<<2*__BITS4) \
1712 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
1713 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
28f540f4
RM
1714 } \
1715 else \
1716 { \
e9b3e3c5 1717 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
28f540f4
RM
1718 if (((__xr >> __a) & 0xff) != 0) \
1719 break; \
1720 } \
1721 \
e9b3e3c5 1722 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
28f540f4 1723 } while (0)
e9b3e3c5 1724#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
28f540f4
RM
1725#endif
1726
62818cfd
UD
1727#if !defined (count_trailing_zeros)
1728/* Define count_trailing_zeros using count_leading_zeros. The latter might be
1729 defined in asm, but if it is not, the C version above is good enough. */
1730#define count_trailing_zeros(count, x) \
1731 do { \
e9b3e3c5
UD
1732 UWtype __ctz_x = (x); \
1733 UWtype __ctz_c; \
62818cfd 1734 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
e9b3e3c5 1735 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
62818cfd
UD
1736 } while (0)
1737#endif
1738
28f540f4
RM
1739#ifndef UDIV_NEEDS_NORMALIZATION
1740#define UDIV_NEEDS_NORMALIZATION 0
1741#endif