]> git.ipfire.org Git - thirdparty/glibc.git/blame - stdlib/longlong.h
Update copyright notices with scripts/update-copyrights
[thirdparty/glibc.git] / stdlib / longlong.h
CommitLineData
28f540f4 1/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
d4697bc9 2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
41b0afab 3
41bdb6e2 4 This file is part of the GNU C Library.
28f540f4 5
41bdb6e2
AJ
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
1da2d51a 8 License as published by the Free Software Foundation; either
41bdb6e2 9 version 2.1 of the License, or (at your option) any later version.
28f540f4 10
def7fbd6
AS
11 In addition to the permissions in the GNU Lesser General Public
12 License, the Free Software Foundation gives you unlimited
13 permission to link the compiled version of this file into
14 combinations with other programs, and to distribute those
15 combinations without any restriction coming from the use of this
16 file. (The Lesser General Public License restrictions do apply in
17 other respects; for example, they cover modification of the file,
18 and distribution when not linked into a combine executable.)
19
41bdb6e2
AJ
20 The GNU C Library is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 Lesser General Public License for more details.
28f540f4 24
41bdb6e2 25 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
26 License along with the GNU C Library; if not, see
27 <http://www.gnu.org/licenses/>. */
28f540f4 28
e9b3e3c5
UD
29/* You have to define the following before including this file:
30
31 UWtype -- An unsigned type, default type for operations (typically a "word")
32 UHWtype -- An unsigned type, at least half the size of UWtype.
33 UDWtype -- An unsigned type, at least twice as large a UWtype
34 W_TYPE_SIZE -- size in bits of UWtype
35
36 UQItype -- Unsigned 8 bit type.
37 SItype, USItype -- Signed and unsigned 32 bit types.
38 DItype, UDItype -- Signed and unsigned 64 bit types.
39
40 On a 32 bit machine UWtype should typically be USItype;
f30070ae 41 on a 64 bit machine, UWtype should typically be UDItype. */
b928942e 42
e9b3e3c5
UD
43#define __BITS4 (W_TYPE_SIZE / 4)
44#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
45#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
46#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
47
48#ifndef W_TYPE_SIZE
49#define W_TYPE_SIZE 32
50#define UWtype USItype
51#define UHWtype USItype
52#define UDWtype UDItype
53#endif
04fbd653 54
def7fbd6
AS
55/* Used in glibc only. */
56#ifndef attribute_hidden
57#define attribute_hidden
58#endif
59
6f8a7dff 60extern const UQItype __clz_tab[256] attribute_hidden;
f30070ae 61
28f540f4
RM
62/* Define auxiliary asm macros.
63
f30070ae
RM
64 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
65 UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype
e9b3e3c5 66 word product in HIGH_PROD and LOW_PROD.
28f540f4 67
e9b3e3c5
UD
68 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
69 UDWtype product. This is just a variant of umul_ppmm.
28f540f4
RM
70
71 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
e9b3e3c5
UD
72 denominator) divides a UDWtype, composed by the UWtype integers
73 HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
74 in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
75 than DENOMINATOR for correct operation. If, in addition, the most
76 significant bit of DENOMINATOR must be 1, then the pre-processor symbol
77 UDIV_NEEDS_NORMALIZATION is defined to 1.
28f540f4
RM
78
79 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
e9b3e3c5
UD
80 denominator). Like udiv_qrnnd but the numbers are signed. The quotient
81 is rounded towards 0.
1da2d51a 82
e9b3e3c5 83 5) count_leading_zeros(count, x) counts the number of zero-bits from the
41b0afab 84 msb to the first nonzero bit in the UWtype X. This is the number of
e9b3e3c5
UD
85 steps X needs to be shifted left to set the msb. Undefined for X == 0,
86 unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
1da2d51a 87
62818cfd
UD
88 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
89 from the least significant end.
90
91 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
e9b3e3c5
UD
92 high_addend_2, low_addend_2) adds two UWtype integers, composed by
93 HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
94 respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
95 (i.e. carry out) is not stored anywhere, and is lost.
96
97 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
98 high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
99 composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
100 LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
101 and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
28f540f4
RM
102 and is lost.
103
104 If any of these macros are left undefined for a particular CPU,
105 C macros are used. */
106
107/* The CPUs come in alphabetical order below.
108
109 Please add support for more CPUs here, or improve the current support
1da2d51a
UD
110 for the CPUs below!
111 (E.g. WE32100, IBM360.) */
28f540f4
RM
112
113#if defined (__GNUC__) && !defined (NO_ASM)
114
115/* We sometimes need to clobber "cc" with gcc2, but that would not be
116 understood by gcc1. Use cpp to avoid major code duplication. */
117#if __GNUC__ < 2
118#define __CLOBBER_CC
119#define __AND_CLOBBER_CC
120#else /* __GNUC__ >= 2 */
121#define __CLOBBER_CC : "cc"
122#define __AND_CLOBBER_CC , "cc"
123#endif /* __GNUC__ < 2 */
124
e9b3e3c5
UD
125#if defined (__alpha) && W_TYPE_SIZE == 64
126#define umul_ppmm(ph, pl, m0, m1) \
127 do { \
128 UDItype __m0 = (m0), __m1 = (m1); \
f30070ae 129 (ph) = __builtin_alpha_umulh (__m0, __m1); \
e9b3e3c5
UD
130 (pl) = __m0 * __m1; \
131 } while (0)
132#define UMUL_TIME 46
133#ifndef LONGLONG_STANDALONE
134#define udiv_qrnnd(q, r, n1, n0, d) \
135 do { UDItype __r; \
136 (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
137 (r) = __r; \
138 } while (0)
f2672ddd 139extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
e9b3e3c5
UD
140#define UDIV_TIME 220
141#endif /* LONGLONG_STANDALONE */
41b0afab 142#ifdef __alpha_cix__
f30070ae
RM
143#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
144#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
41b0afab
RM
145#define COUNT_LEADING_ZEROS_0 64
146#else
41b0afab
RM
147#define count_leading_zeros(COUNT,X) \
148 do { \
149 UDItype __xr = (X), __t, __a; \
f30070ae 150 __t = __builtin_alpha_cmpbge (0, __xr); \
41b0afab 151 __a = __clz_tab[__t ^ 0xff] - 1; \
f30070ae 152 __t = __builtin_alpha_extbl (__xr, __a); \
41b0afab
RM
153 (COUNT) = 64 - (__clz_tab[__t] + __a*8); \
154 } while (0)
155#define count_trailing_zeros(COUNT,X) \
156 do { \
157 UDItype __xr = (X), __t, __a; \
f30070ae 158 __t = __builtin_alpha_cmpbge (0, __xr); \
41b0afab
RM
159 __t = ~__t & -~__t; \
160 __a = ((__t & 0xCC) != 0) * 2; \
161 __a += ((__t & 0xF0) != 0) * 4; \
162 __a += ((__t & 0xAA) != 0); \
f30070ae 163 __t = __builtin_alpha_extbl (__xr, __a); \
41b0afab
RM
164 __a <<= 3; \
165 __t &= -__t; \
166 __a += ((__t & 0xCC) != 0) * 2; \
167 __a += ((__t & 0xF0) != 0) * 4; \
168 __a += ((__t & 0xAA) != 0); \
169 (COUNT) = __a; \
170 } while (0)
171#endif /* __alpha_cix__ */
e9b3e3c5
UD
172#endif /* __alpha */
173
174#if defined (__arc__) && W_TYPE_SIZE == 32
1da2d51a 175#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 176 __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \
1da2d51a
UD
177 : "=r" ((USItype) (sh)), \
178 "=&r" ((USItype) (sl)) \
179 : "%r" ((USItype) (ah)), \
180 "rIJ" ((USItype) (bh)), \
181 "%r" ((USItype) (al)), \
182 "rIJ" ((USItype) (bl)))
183#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 184 __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \
1da2d51a
UD
185 : "=r" ((USItype) (sh)), \
186 "=&r" ((USItype) (sl)) \
187 : "r" ((USItype) (ah)), \
188 "rIJ" ((USItype) (bh)), \
189 "r" ((USItype) (al)), \
190 "rIJ" ((USItype) (bl)))
ab07cea8
JM
191
192#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
193#ifdef __ARC_NORM__
194#define count_leading_zeros(count, x) \
195 do \
196 { \
197 SItype c_; \
198 \
199 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
200 (count) = c_ + 1; \
201 } \
202 while (0)
203#define COUNT_LEADING_ZEROS_0 32
204#endif
1da2d51a 205#endif
28f540f4 206
8115f29b
L
207#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
208 && W_TYPE_SIZE == 32
28f540f4 209#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 210 __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \
1da2d51a
UD
211 : "=r" ((USItype) (sh)), \
212 "=&r" ((USItype) (sl)) \
213 : "%r" ((USItype) (ah)), \
214 "rI" ((USItype) (bh)), \
215 "%r" ((USItype) (al)), \
f30070ae 216 "rI" ((USItype) (bl)) __CLOBBER_CC)
28f540f4 217#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 218 __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \
1da2d51a
UD
219 : "=r" ((USItype) (sh)), \
220 "=&r" ((USItype) (sl)) \
221 : "r" ((USItype) (ah)), \
222 "rI" ((USItype) (bh)), \
223 "r" ((USItype) (al)), \
f30070ae 224 "rI" ((USItype) (bl)) __CLOBBER_CC)
8115f29b
L
225# if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \
226 || defined(__ARM_ARCH_3__)
227# define umul_ppmm(xh, xl, a, b) \
228 do { \
229 register USItype __t0, __t1, __t2; \
230 __asm__ ("%@ Inlined umul_ppmm\n" \
41b0afab
RM
231 " mov %2, %5, lsr #16\n" \
232 " mov %0, %6, lsr #16\n" \
233 " bic %3, %5, %2, lsl #16\n" \
234 " bic %4, %6, %0, lsl #16\n" \
235 " mul %1, %3, %4\n" \
236 " mul %4, %2, %4\n" \
237 " mul %3, %0, %3\n" \
238 " mul %0, %2, %0\n" \
239 " adds %3, %4, %3\n" \
240 " addcs %0, %0, #65536\n" \
241 " adds %1, %1, %3, lsl #16\n" \
242 " adc %0, %0, %3, lsr #16" \
1da2d51a
UD
243 : "=&r" ((USItype) (xh)), \
244 "=r" ((USItype) (xl)), \
245 "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \
246 : "r" ((USItype) (a)), \
8115f29b
L
247 "r" ((USItype) (b)) __CLOBBER_CC ); \
248 } while (0)
249# define UMUL_TIME 20
250# else
251# define umul_ppmm(xh, xl, a, b) \
252 do { \
253 /* Generate umull, under compiler control. */ \
254 register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \
255 (xl) = (USItype)__t0; \
256 (xh) = (USItype)(__t0 >> 32); \
257 } while (0)
258# define UMUL_TIME 3
259# endif
260# define UDIV_TIME 100
28f540f4
RM
261#endif /* __arm__ */
262
24784465
RM
263#if defined(__arm__)
264/* Let gcc decide how best to implement count_leading_zeros. */
265#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
8115f29b 266#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
24784465
RM
267#define COUNT_LEADING_ZEROS_0 32
268#endif
269
8115f29b
L
270#if defined (__AVR__)
271
272#if W_TYPE_SIZE == 16
273#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
274#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X))
275#define COUNT_LEADING_ZEROS_0 16
276#endif /* W_TYPE_SIZE == 16 */
277
278#if W_TYPE_SIZE == 32
279#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X))
280#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X))
281#define COUNT_LEADING_ZEROS_0 32
282#endif /* W_TYPE_SIZE == 32 */
283
284#if W_TYPE_SIZE == 64
285#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X))
286#define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X))
287#define COUNT_LEADING_ZEROS_0 64
288#endif /* W_TYPE_SIZE == 64 */
289
290#endif /* defined (__AVR__) */
291
ab07cea8
JM
292#if defined (__CRIS__)
293
294#if __CRIS_arch_version >= 3
24784465 295#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
ab07cea8
JM
296#define COUNT_LEADING_ZEROS_0 32
297#endif /* __CRIS_arch_version >= 3 */
298
24784465
RM
299#if __CRIS_arch_version >= 8
300#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
ab07cea8
JM
301#endif /* __CRIS_arch_version >= 8 */
302
303#if __CRIS_arch_version >= 10
304#define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v))
305#else
306#define __umulsidi3 __umulsidi3
307extern UDItype __umulsidi3 (USItype, USItype);
308#endif /* __CRIS_arch_version >= 10 */
309
310#define umul_ppmm(w1, w0, u, v) \
311 do { \
312 UDItype __x = __umulsidi3 (u, v); \
313 (w0) = (USItype) (__x); \
314 (w1) = (USItype) (__x >> 32); \
315 } while (0)
316
317/* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for
318 DFmode ("double" intrinsics, avoiding two of the three insns handling
319 carry), but defining them as open-code C composing and doing the
320 operation in DImode (UDImode) shows that the DImode needs work:
321 register pressure from requiring neighboring registers and the
322 traffic to and from them come to dominate, in the 4.7 series. */
323
324#endif /* defined (__CRIS__) */
24784465 325
e9b3e3c5 326#if defined (__hppa) && W_TYPE_SIZE == 32
28f540f4 327#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 328 __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
1da2d51a
UD
329 : "=r" ((USItype) (sh)), \
330 "=&r" ((USItype) (sl)) \
331 : "%rM" ((USItype) (ah)), \
332 "rM" ((USItype) (bh)), \
333 "%rM" ((USItype) (al)), \
334 "rM" ((USItype) (bl)))
28f540f4 335#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 336 __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
1da2d51a
UD
337 : "=r" ((USItype) (sh)), \
338 "=&r" ((USItype) (sl)) \
339 : "rM" ((USItype) (ah)), \
340 "rM" ((USItype) (bh)), \
341 "rM" ((USItype) (al)), \
342 "rM" ((USItype) (bl)))
28f540f4 343#if defined (_PA_RISC1_1)
1da2d51a 344#define umul_ppmm(w1, w0, u, v) \
28f540f4 345 do { \
1da2d51a
UD
346 union \
347 { \
348 UDItype __f; \
349 struct {USItype __w1, __w0;} __w1w0; \
350 } __t; \
28f540f4 351 __asm__ ("xmpyu %1,%2,%0" \
1da2d51a
UD
352 : "=x" (__t.__f) \
353 : "x" ((USItype) (u)), \
354 "x" ((USItype) (v))); \
355 (w1) = __t.__w1w0.__w1; \
356 (w0) = __t.__w1w0.__w0; \
357 } while (0)
28f540f4 358#define UMUL_TIME 8
28f540f4 359#else
1da2d51a 360#define UMUL_TIME 30
28f540f4 361#endif
1da2d51a 362#define UDIV_TIME 40
28f540f4 363#define count_leading_zeros(count, x) \
41b0afab
RM
364 do { \
365 USItype __tmp; \
366 __asm__ ( \
367 "ldi 1,%0\n" \
368" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
369" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\
370" ldo 16(%0),%0 ; Yes. Perform add.\n" \
371" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
372" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\
373" ldo 8(%0),%0 ; Yes. Perform add.\n" \
374" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
375" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\
376" ldo 4(%0),%0 ; Yes. Perform add.\n" \
377" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
378" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\
379" ldo 2(%0),%0 ; Yes. Perform add.\n" \
380" extru %1,30,1,%1 ; Extract bit 1.\n" \
381" sub %0,%1,%0 ; Subtract it.\n" \
382 : "=r" (count), "=r" (__tmp) : "1" (x)); \
28f540f4 383 } while (0)
28f540f4
RM
384#endif
385
f30070ae 386#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
def7fbd6 387#if !defined (__zarch__)
e9b3e3c5
UD
388#define smul_ppmm(xh, xl, m0, m1) \
389 do { \
390 union {DItype __ll; \
391 struct {USItype __h, __l;} __i; \
f30070ae
RM
392 } __x; \
393 __asm__ ("lr %N0,%1\n\tmr %0,%2" \
394 : "=&r" (__x.__ll) \
395 : "r" (m0), "r" (m1)); \
396 (xh) = __x.__i.__h; (xl) = __x.__i.__l; \
e9b3e3c5
UD
397 } while (0)
398#define sdiv_qrnnd(q, r, n1, n0, d) \
399 do { \
400 union {DItype __ll; \
401 struct {USItype __h, __l;} __i; \
f30070ae
RM
402 } __x; \
403 __x.__i.__h = n1; __x.__i.__l = n0; \
e9b3e3c5 404 __asm__ ("dr %0,%2" \
f30070ae
RM
405 : "=r" (__x.__ll) \
406 : "0" (__x.__ll), "r" (d)); \
407 (q) = __x.__i.__l; (r) = __x.__i.__h; \
e9b3e3c5 408 } while (0)
def7fbd6
AS
409#else
410#define smul_ppmm(xh, xl, m0, m1) \
411 do { \
48693bea
AK
412 register SItype __r0 __asm__ ("0"); \
413 register SItype __r1 __asm__ ("1") = (m0); \
414 \
def7fbd6 415 __asm__ ("mr\t%%r0,%3" \
48693bea
AK
416 : "=r" (__r0), "=r" (__r1) \
417 : "r" (__r1), "r" (m1)); \
418 (xh) = __r0; (xl) = __r1; \
def7fbd6 419 } while (0)
48693bea 420
def7fbd6
AS
421#define sdiv_qrnnd(q, r, n1, n0, d) \
422 do { \
48693bea
AK
423 register SItype __r0 __asm__ ("0") = (n1); \
424 register SItype __r1 __asm__ ("1") = (n0); \
425 \
426 __asm__ ("dr\t%%r0,%4" \
427 : "=r" (__r0), "=r" (__r1) \
428 : "r" (__r0), "r" (__r1), "r" (d)); \
429 (q) = __r1; (r) = __r0; \
def7fbd6
AS
430 } while (0)
431#endif /* __zarch__ */
e9b3e3c5
UD
432#endif
433
434#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
28f540f4 435#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
24784465 436 __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \
1da2d51a
UD
437 : "=r" ((USItype) (sh)), \
438 "=&r" ((USItype) (sl)) \
439 : "%0" ((USItype) (ah)), \
440 "g" ((USItype) (bh)), \
441 "%1" ((USItype) (al)), \
442 "g" ((USItype) (bl)))
28f540f4 443#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
24784465 444 __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \
1da2d51a
UD
445 : "=r" ((USItype) (sh)), \
446 "=&r" ((USItype) (sl)) \
447 : "0" ((USItype) (ah)), \
448 "g" ((USItype) (bh)), \
449 "1" ((USItype) (al)), \
450 "g" ((USItype) (bl)))
28f540f4 451#define umul_ppmm(w1, w0, u, v) \
24784465 452 __asm__ ("mul{l} %3" \
1da2d51a
UD
453 : "=a" ((USItype) (w0)), \
454 "=d" ((USItype) (w1)) \
455 : "%0" ((USItype) (u)), \
456 "rm" ((USItype) (v)))
41b0afab 457#define udiv_qrnnd(q, r, n1, n0, dv) \
24784465 458 __asm__ ("div{l} %4" \
1da2d51a
UD
459 : "=a" ((USItype) (q)), \
460 "=d" ((USItype) (r)) \
461 : "0" ((USItype) (n0)), \
462 "1" ((USItype) (n1)), \
41b0afab 463 "rm" ((USItype) (dv)))
24784465
RM
464#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
465#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
28f540f4
RM
466#define UMUL_TIME 40
467#define UDIV_TIME 40
468#endif /* 80x86 */
469
24784465
RM
470#if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64
471#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
472 __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \
473 : "=r" ((UDItype) (sh)), \
474 "=&r" ((UDItype) (sl)) \
475 : "%0" ((UDItype) (ah)), \
476 "rme" ((UDItype) (bh)), \
477 "%1" ((UDItype) (al)), \
478 "rme" ((UDItype) (bl)))
479#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
480 __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \
481 : "=r" ((UDItype) (sh)), \
482 "=&r" ((UDItype) (sl)) \
483 : "0" ((UDItype) (ah)), \
484 "rme" ((UDItype) (bh)), \
485 "1" ((UDItype) (al)), \
486 "rme" ((UDItype) (bl)))
487#define umul_ppmm(w1, w0, u, v) \
488 __asm__ ("mul{q} %3" \
489 : "=a" ((UDItype) (w0)), \
490 "=d" ((UDItype) (w1)) \
491 : "%0" ((UDItype) (u)), \
492 "rm" ((UDItype) (v)))
493#define udiv_qrnnd(q, r, n1, n0, dv) \
494 __asm__ ("div{q} %4" \
495 : "=a" ((UDItype) (q)), \
496 "=d" ((UDItype) (r)) \
497 : "0" ((UDItype) (n0)), \
498 "1" ((UDItype) (n1)), \
499 "rm" ((UDItype) (dv)))
8115f29b
L
500#define count_leading_zeros(count, x) ((count) = __builtin_clzll (x))
501#define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x))
24784465
RM
502#define UMUL_TIME 40
503#define UDIV_TIME 40
504#endif /* x86_64 */
505
e9b3e3c5 506#if defined (__i960__) && W_TYPE_SIZE == 32
28f540f4
RM
507#define umul_ppmm(w1, w0, u, v) \
508 ({union {UDItype __ll; \
509 struct {USItype __l, __h;} __i; \
510 } __xx; \
511 __asm__ ("emul %2,%1,%0" \
512 : "=d" (__xx.__ll) \
1da2d51a
UD
513 : "%dI" ((USItype) (u)), \
514 "dI" ((USItype) (v))); \
28f540f4
RM
515 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
516#define __umulsidi3(u, v) \
517 ({UDItype __w; \
518 __asm__ ("emul %2,%1,%0" \
519 : "=d" (__w) \
1da2d51a
UD
520 : "%dI" ((USItype) (u)), \
521 "dI" ((USItype) (v))); \
62818cfd 522 __w; })
1da2d51a 523#endif /* __i960__ */
28f540f4 524
def7fbd6
AS
525#if defined (__ia64) && W_TYPE_SIZE == 64
526/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
527 "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
528 code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
529 register, which takes an extra cycle. */
530#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
531 do { \
532 UWtype __x; \
533 __x = (al) - (bl); \
534 if ((al) < (bl)) \
535 (sh) = (ah) - (bh) - 1; \
536 else \
537 (sh) = (ah) - (bh); \
538 (sl) = __x; \
539 } while (0)
540
541/* Do both product parts in assembly, since that gives better code with
542 all gcc versions. Some callers will just use the upper part, and in
543 that situation we waste an instruction, but not any cycles. */
544#define umul_ppmm(ph, pl, m0, m1) \
545 __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
546 : "=&f" (ph), "=f" (pl) \
547 : "f" (m0), "f" (m1))
548#define count_leading_zeros(count, x) \
549 do { \
550 UWtype _x = (x), _y, _a, _c; \
551 __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
552 __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
553 _c = (_a - 1) << 3; \
554 _x >>= _c; \
555 if (_x >= 1 << 4) \
556 _x >>= 4, _c += 4; \
557 if (_x >= 1 << 2) \
558 _x >>= 2, _c += 2; \
559 _c += _x >> 1; \
560 (count) = W_TYPE_SIZE - 1 - _c; \
561 } while (0)
562/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
563 based, and we don't need a special case for x==0 here */
564#define count_trailing_zeros(count, x) \
565 do { \
566 UWtype __ctz_x = (x); \
567 __asm__ ("popcnt %0 = %1" \
568 : "=r" (count) \
569 : "r" ((__ctz_x-1) & ~__ctz_x)); \
570 } while (0)
571#define UMUL_TIME 14
572#endif
573
e9b3e3c5 574#if defined (__M32R__) && W_TYPE_SIZE == 32
1da2d51a
UD
575#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
576 /* The cmp clears the condition bit. */ \
181742f8 577 __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \
1da2d51a
UD
578 : "=r" ((USItype) (sh)), \
579 "=&r" ((USItype) (sl)) \
181742f8 580 : "0" ((USItype) (ah)), \
1da2d51a 581 "r" ((USItype) (bh)), \
181742f8 582 "1" ((USItype) (al)), \
1da2d51a
UD
583 "r" ((USItype) (bl)) \
584 : "cbit")
585#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
586 /* The cmp clears the condition bit. */ \
181742f8 587 __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \
1da2d51a
UD
588 : "=r" ((USItype) (sh)), \
589 "=&r" ((USItype) (sl)) \
590 : "0" ((USItype) (ah)), \
591 "r" ((USItype) (bh)), \
592 "1" ((USItype) (al)), \
593 "r" ((USItype) (bl)) \
594 : "cbit")
595#endif /* __M32R__ */
596
e9b3e3c5 597#if defined (__mc68000__) && W_TYPE_SIZE == 32
28f540f4 598#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 599 __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
1da2d51a
UD
600 : "=d" ((USItype) (sh)), \
601 "=&d" ((USItype) (sl)) \
602 : "%0" ((USItype) (ah)), \
603 "d" ((USItype) (bh)), \
604 "%1" ((USItype) (al)), \
605 "g" ((USItype) (bl)))
28f540f4 606#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 607 __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
1da2d51a
UD
608 : "=d" ((USItype) (sh)), \
609 "=&d" ((USItype) (sl)) \
610 : "0" ((USItype) (ah)), \
611 "d" ((USItype) (bh)), \
612 "1" ((USItype) (al)), \
613 "g" ((USItype) (bl)))
614
f30070ae
RM
615/* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */
616#if (defined (__mc68020__) && !defined (__mc68060__))
28f540f4
RM
617#define umul_ppmm(w1, w0, u, v) \
618 __asm__ ("mulu%.l %3,%1:%0" \
1da2d51a
UD
619 : "=d" ((USItype) (w0)), \
620 "=d" ((USItype) (w1)) \
621 : "%0" ((USItype) (u)), \
622 "dmi" ((USItype) (v)))
28f540f4
RM
623#define UMUL_TIME 45
624#define udiv_qrnnd(q, r, n1, n0, d) \
625 __asm__ ("divu%.l %4,%1:%0" \
1da2d51a
UD
626 : "=d" ((USItype) (q)), \
627 "=d" ((USItype) (r)) \
628 : "0" ((USItype) (n0)), \
629 "1" ((USItype) (n1)), \
630 "dmi" ((USItype) (d)))
28f540f4
RM
631#define UDIV_TIME 90
632#define sdiv_qrnnd(q, r, n1, n0, d) \
633 __asm__ ("divs%.l %4,%1:%0" \
1da2d51a
UD
634 : "=d" ((USItype) (q)), \
635 "=d" ((USItype) (r)) \
636 : "0" ((USItype) (n0)), \
637 "1" ((USItype) (n1)), \
638 "dmi" ((USItype) (d)))
639
f30070ae
RM
640#elif defined (__mcoldfire__) /* not mc68020 */
641
642#define umul_ppmm(xh, xl, a, b) \
643 __asm__ ("| Inlined umul_ppmm\n" \
644 " move%.l %2,%/d0\n" \
645 " move%.l %3,%/d1\n" \
646 " move%.l %/d0,%/d2\n" \
647 " swap %/d0\n" \
648 " move%.l %/d1,%/d3\n" \
649 " swap %/d1\n" \
650 " move%.w %/d2,%/d4\n" \
651 " mulu %/d3,%/d4\n" \
652 " mulu %/d1,%/d2\n" \
653 " mulu %/d0,%/d3\n" \
654 " mulu %/d0,%/d1\n" \
655 " move%.l %/d4,%/d0\n" \
656 " clr%.w %/d0\n" \
657 " swap %/d0\n" \
658 " add%.l %/d0,%/d2\n" \
659 " add%.l %/d3,%/d2\n" \
660 " jcc 1f\n" \
661 " add%.l %#65536,%/d1\n" \
662 "1: swap %/d2\n" \
663 " moveq %#0,%/d0\n" \
664 " move%.w %/d2,%/d0\n" \
665 " move%.w %/d4,%/d2\n" \
666 " move%.l %/d2,%1\n" \
667 " add%.l %/d1,%/d0\n" \
668 " move%.l %/d0,%0" \
669 : "=g" ((USItype) (xh)), \
670 "=g" ((USItype) (xl)) \
671 : "g" ((USItype) (a)), \
672 "g" ((USItype) (b)) \
673 : "d0", "d1", "d2", "d3", "d4")
674#define UMUL_TIME 100
675#define UDIV_TIME 400
676#else /* not ColdFire */
1da2d51a 677/* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */
ba848785 678#define umul_ppmm(xh, xl, a, b) \
772d0e1a 679 __asm__ ("| Inlined umul_ppmm\n" \
41b0afab
RM
680 " move%.l %2,%/d0\n" \
681 " move%.l %3,%/d1\n" \
682 " move%.l %/d0,%/d2\n" \
683 " swap %/d0\n" \
684 " move%.l %/d1,%/d3\n" \
685 " swap %/d1\n" \
686 " move%.w %/d2,%/d4\n" \
687 " mulu %/d3,%/d4\n" \
688 " mulu %/d1,%/d2\n" \
689 " mulu %/d0,%/d3\n" \
690 " mulu %/d0,%/d1\n" \
691 " move%.l %/d4,%/d0\n" \
692 " eor%.w %/d0,%/d0\n" \
693 " swap %/d0\n" \
694 " add%.l %/d0,%/d2\n" \
695 " add%.l %/d3,%/d2\n" \
696 " jcc 1f\n" \
697 " add%.l %#65536,%/d1\n" \
698 "1: swap %/d2\n" \
699 " moveq %#0,%/d0\n" \
700 " move%.w %/d2,%/d0\n" \
701 " move%.w %/d4,%/d2\n" \
702 " move%.l %/d2,%1\n" \
703 " add%.l %/d1,%/d0\n" \
704 " move%.l %/d0,%0" \
1da2d51a
UD
705 : "=g" ((USItype) (xh)), \
706 "=g" ((USItype) (xl)) \
707 : "g" ((USItype) (a)), \
708 "g" ((USItype) (b)) \
709 : "d0", "d1", "d2", "d3", "d4")
28f540f4
RM
710#define UMUL_TIME 100
711#define UDIV_TIME 400
f30070ae 712
28f540f4 713#endif /* not mc68020 */
1da2d51a 714
f30070ae
RM
715/* The '020, '030, '040 and '060 have bitfield insns.
716 cpu32 disguises as a 68020, but lacks them. */
717#if defined (__mc68020__) && !defined (__mcpu32__)
1da2d51a
UD
718#define count_leading_zeros(count, x) \
719 __asm__ ("bfffo %1{%b2:%b2},%0" \
720 : "=d" ((USItype) (count)) \
721 : "od" ((USItype) (x)), "n" (0))
24784465
RM
722/* Some ColdFire architectures have a ff1 instruction supported via
723 __builtin_clz. */
724#elif defined (__mcfisaaplus__) || defined (__mcfisac__)
725#define count_leading_zeros(count,x) ((count) = __builtin_clz (x))
726#define COUNT_LEADING_ZEROS_0 32
1da2d51a 727#endif
28f540f4
RM
728#endif /* mc68000 */
729
e9b3e3c5 730#if defined (__m88000__) && W_TYPE_SIZE == 32
28f540f4 731#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 732 __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
1da2d51a
UD
733 : "=r" ((USItype) (sh)), \
734 "=&r" ((USItype) (sl)) \
735 : "%rJ" ((USItype) (ah)), \
736 "rJ" ((USItype) (bh)), \
737 "%rJ" ((USItype) (al)), \
738 "rJ" ((USItype) (bl)))
28f540f4 739#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 740 __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
1da2d51a
UD
741 : "=r" ((USItype) (sh)), \
742 "=&r" ((USItype) (sl)) \
743 : "rJ" ((USItype) (ah)), \
744 "rJ" ((USItype) (bh)), \
745 "rJ" ((USItype) (al)), \
746 "rJ" ((USItype) (bl)))
28f540f4
RM
747#define count_leading_zeros(count, x) \
748 do { \
749 USItype __cbtmp; \
750 __asm__ ("ff1 %0,%1" \
751 : "=r" (__cbtmp) \
1da2d51a 752 : "r" ((USItype) (x))); \
28f540f4
RM
753 (count) = __cbtmp ^ 31; \
754 } while (0)
e9b3e3c5 755#define COUNT_LEADING_ZEROS_0 63 /* sic */
1da2d51a 756#if defined (__mc88110__)
28f540f4
RM
757#define umul_ppmm(wh, wl, u, v) \
758 do { \
759 union {UDItype __ll; \
760 struct {USItype __h, __l;} __i; \
761 } __xx; \
762 __asm__ ("mulu.d %0,%1,%2" \
763 : "=r" (__xx.__ll) \
1da2d51a
UD
764 : "r" ((USItype) (u)), \
765 "r" ((USItype) (v))); \
28f540f4
RM
766 (wh) = __xx.__i.__h; \
767 (wl) = __xx.__i.__l; \
768 } while (0)
769#define udiv_qrnnd(q, r, n1, n0, d) \
770 ({union {UDItype __ll; \
771 struct {USItype __h, __l;} __i; \
772 } __xx; \
773 USItype __q; \
774 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
775 __asm__ ("divu.d %0,%1,%2" \
776 : "=r" (__q) \
777 : "r" (__xx.__ll), \
1da2d51a 778 "r" ((USItype) (d))); \
28f540f4
RM
779 (r) = (n0) - __q * (d); (q) = __q; })
780#define UMUL_TIME 5
781#define UDIV_TIME 25
782#else
783#define UMUL_TIME 17
784#define UDIV_TIME 150
1da2d51a 785#endif /* __mc88110__ */
28f540f4
RM
786#endif /* __m88000__ */
787
def7fbd6
AS
788#if defined (__mn10300__)
789# if defined (__AM33__)
790# define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
791# define umul_ppmm(w1, w0, u, v) \
792 asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
793# define smul_ppmm(w1, w0, u, v) \
794 asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v))
795# else
796# define umul_ppmm(w1, w0, u, v) \
797 asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
798# define smul_ppmm(w1, w0, u, v) \
799 asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v))
800# endif
801# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
802 do { \
803 DWunion __s, __a, __b; \
804 __a.s.low = (al); __a.s.high = (ah); \
805 __b.s.low = (bl); __b.s.high = (bh); \
806 __s.ll = __a.ll + __b.ll; \
807 (sl) = __s.s.low; (sh) = __s.s.high; \
808 } while (0)
809# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
810 do { \
811 DWunion __s, __a, __b; \
812 __a.s.low = (al); __a.s.high = (ah); \
813 __b.s.low = (bl); __b.s.high = (bh); \
814 __s.ll = __a.ll - __b.ll; \
815 (sl) = __s.s.low; (sh) = __s.s.high; \
816 } while (0)
817# define udiv_qrnnd(q, r, nh, nl, d) \
818 asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
819# define sdiv_qrnnd(q, r, nh, nl, d) \
820 asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh))
821# define UMUL_TIME 3
822# define UDIV_TIME 38
823#endif
824
e9b3e3c5 825#if defined (__mips__) && W_TYPE_SIZE == 32
24784465
RM
826#define umul_ppmm(w1, w0, u, v) \
827 do { \
828 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
829 (w1) = (USItype) (__x >> 32); \
830 (w0) = (USItype) (__x); \
831 } while (0)
28f540f4
RM
832#define UMUL_TIME 10
833#define UDIV_TIME 100
24784465
RM
834
835#if (__mips == 32 || __mips == 64) && ! __mips16
836#define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X))
837#define COUNT_LEADING_ZEROS_0 32
838#endif
28f540f4
RM
839#endif /* __mips__ */
840
e9b3e3c5 841#if defined (__ns32000__) && W_TYPE_SIZE == 32
28f540f4
RM
842#define umul_ppmm(w1, w0, u, v) \
843 ({union {UDItype __ll; \
844 struct {USItype __l, __h;} __i; \
845 } __xx; \
846 __asm__ ("meid %2,%0" \
847 : "=g" (__xx.__ll) \
1da2d51a
UD
848 : "%0" ((USItype) (u)), \
849 "g" ((USItype) (v))); \
28f540f4
RM
850 (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
851#define __umulsidi3(u, v) \
852 ({UDItype __w; \
853 __asm__ ("meid %2,%0" \
854 : "=g" (__w) \
1da2d51a
UD
855 : "%0" ((USItype) (u)), \
856 "g" ((USItype) (v))); \
28f540f4
RM
857 __w; })
858#define udiv_qrnnd(q, r, n1, n0, d) \
859 ({union {UDItype __ll; \
860 struct {USItype __l, __h;} __i; \
861 } __xx; \
862 __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
863 __asm__ ("deid %2,%0" \
864 : "=g" (__xx.__ll) \
865 : "0" (__xx.__ll), \
1da2d51a 866 "g" ((USItype) (d))); \
28f540f4 867 (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
62818cfd 868#define count_trailing_zeros(count,x) \
41b0afab
RM
869 do { \
870 __asm__ ("ffsd %2,%0" \
48693bea
AK
871 : "=r" ((USItype) (count)) \
872 : "0" ((USItype) 0), \
873 "r" ((USItype) (x))); \
62818cfd 874 } while (0)
28f540f4
RM
875#endif /* __ns32000__ */
876
41b0afab
RM
877/* FIXME: We should test _IBMR2 here when we add assembly support for the
878 system vendor compilers.
879 FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good
880 enough, since that hits ARM and m68k too. */
881#if (defined (_ARCH_PPC) /* AIX */ \
41b0afab
RM
882 || defined (__powerpc__) /* gcc */ \
883 || defined (__POWERPC__) /* BEOS */ \
884 || defined (__ppc__) /* Darwin */ \
24784465
RM
885 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
886 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
48693bea 887 && CPU_FAMILY == PPC) \
41b0afab 888 ) && W_TYPE_SIZE == 32
28f540f4
RM
889#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
890 do { \
891 if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 892 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
41b0afab
RM
893 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
894 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
c3c8283c 895 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
41b0afab 896 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
28f540f4 897 else \
c3c8283c 898 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
41b0afab
RM
899 : "=r" (sh), "=&r" (sl) \
900 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
28f540f4
RM
901 } while (0)
902#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
903 do { \
904 if (__builtin_constant_p (ah) && (ah) == 0) \
c3c8283c 905 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
41b0afab
RM
906 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
907 else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
c3c8283c 908 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
41b0afab 909 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
28f540f4 910 else if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 911 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
41b0afab
RM
912 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
913 else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
c3c8283c 914 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
41b0afab 915 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
28f540f4 916 else \
c3c8283c 917 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
41b0afab
RM
918 : "=r" (sh), "=&r" (sl) \
919 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
28f540f4
RM
920 } while (0)
921#define count_leading_zeros(count, x) \
c3c8283c 922 __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
e9b3e3c5 923#define COUNT_LEADING_ZEROS_0 32
41b0afab 924#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
24784465
RM
925 || defined (__ppc__) \
926 || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
927 || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
48693bea 928 && CPU_FAMILY == PPC)
28f540f4
RM
929#define umul_ppmm(ph, pl, m0, m1) \
930 do { \
931 USItype __m0 = (m0), __m1 = (m1); \
41b0afab 932 __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
28f540f4
RM
933 (pl) = __m0 * __m1; \
934 } while (0)
935#define UMUL_TIME 15
936#define smul_ppmm(ph, pl, m0, m1) \
937 do { \
938 SItype __m0 = (m0), __m1 = (m1); \
41b0afab 939 __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
28f540f4
RM
940 (pl) = __m0 * __m1; \
941 } while (0)
942#define SMUL_TIME 14
943#define UDIV_TIME 120
28f540f4 944#endif
41b0afab
RM
945#endif /* 32-bit POWER architecture variants. */
946
947/* We should test _IBMR2 here when we add assembly support for the system
948 vendor compilers. */
949#if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64
09af82c9
RM
950#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
951 do { \
952 if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 953 __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
41b0afab
RM
954 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
955 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
c3c8283c 956 __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
41b0afab 957 : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
09af82c9 958 else \
c3c8283c 959 __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
41b0afab
RM
960 : "=r" (sh), "=&r" (sl) \
961 : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
09af82c9
RM
962 } while (0)
963#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
964 do { \
965 if (__builtin_constant_p (ah) && (ah) == 0) \
c3c8283c 966 __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
41b0afab
RM
967 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
968 else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
c3c8283c 969 __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
41b0afab 970 : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
09af82c9 971 else if (__builtin_constant_p (bh) && (bh) == 0) \
c3c8283c 972 __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
41b0afab
RM
973 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
974 else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
c3c8283c 975 __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
41b0afab 976 : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
09af82c9 977 else \
c3c8283c 978 __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
41b0afab
RM
979 : "=r" (sh), "=&r" (sl) \
980 : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
09af82c9 981 } while (0)
09af82c9 982#define count_leading_zeros(count, x) \
41b0afab 983 __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
09af82c9 984#define COUNT_LEADING_ZEROS_0 64
09af82c9
RM
985#define umul_ppmm(ph, pl, m0, m1) \
986 do { \
987 UDItype __m0 = (m0), __m1 = (m1); \
41b0afab 988 __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
09af82c9
RM
989 (pl) = __m0 * __m1; \
990 } while (0)
41b0afab 991#define UMUL_TIME 15
09af82c9
RM
992#define smul_ppmm(ph, pl, m0, m1) \
993 do { \
994 DItype __m0 = (m0), __m1 = (m1); \
41b0afab 995 __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
09af82c9
RM
996 (pl) = __m0 * __m1; \
997 } while (0)
41b0afab
RM
998#define SMUL_TIME 14 /* ??? */
999#define UDIV_TIME 120 /* ??? */
1000#endif /* 64-bit PowerPC. */
28f540f4 1001
e9b3e3c5 1002#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
28f540f4 1003#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1004 __asm__ ("a %1,%5\n\tae %0,%3" \
1da2d51a
UD
1005 : "=r" ((USItype) (sh)), \
1006 "=&r" ((USItype) (sl)) \
1007 : "%0" ((USItype) (ah)), \
1008 "r" ((USItype) (bh)), \
1009 "%1" ((USItype) (al)), \
1010 "r" ((USItype) (bl)))
28f540f4 1011#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1012 __asm__ ("s %1,%5\n\tse %0,%3" \
1da2d51a
UD
1013 : "=r" ((USItype) (sh)), \
1014 "=&r" ((USItype) (sl)) \
1015 : "0" ((USItype) (ah)), \
1016 "r" ((USItype) (bh)), \
1017 "1" ((USItype) (al)), \
1018 "r" ((USItype) (bl)))
28f540f4
RM
1019#define umul_ppmm(ph, pl, m0, m1) \
1020 do { \
1021 USItype __m0 = (m0), __m1 = (m1); \
1022 __asm__ ( \
41b0afab
RM
1023 "s r2,r2\n" \
1024" mts r10,%2\n" \
1025" m r2,%3\n" \
1026" m r2,%3\n" \
1027" m r2,%3\n" \
1028" m r2,%3\n" \
1029" m r2,%3\n" \
1030" m r2,%3\n" \
1031" m r2,%3\n" \
1032" m r2,%3\n" \
1033" m r2,%3\n" \
1034" m r2,%3\n" \
1035" m r2,%3\n" \
1036" m r2,%3\n" \
1037" m r2,%3\n" \
1038" m r2,%3\n" \
1039" m r2,%3\n" \
1040" m r2,%3\n" \
1041" cas %0,r2,r0\n" \
1042" mfs r10,%1" \
1da2d51a
UD
1043 : "=r" ((USItype) (ph)), \
1044 "=r" ((USItype) (pl)) \
28f540f4
RM
1045 : "%r" (__m0), \
1046 "r" (__m1) \
1047 : "r2"); \
1048 (ph) += ((((SItype) __m0 >> 31) & __m1) \
1049 + (((SItype) __m1 >> 31) & __m0)); \
1050 } while (0)
1051#define UMUL_TIME 20
1052#define UDIV_TIME 200
1053#define count_leading_zeros(count, x) \
1054 do { \
1055 if ((x) >= 0x10000) \
1056 __asm__ ("clz %0,%1" \
1da2d51a
UD
1057 : "=r" ((USItype) (count)) \
1058 : "r" ((USItype) (x) >> 16)); \
28f540f4
RM
1059 else \
1060 { \
1061 __asm__ ("clz %0,%1" \
1da2d51a
UD
1062 : "=r" ((USItype) (count)) \
1063 : "r" ((USItype) (x))); \
28f540f4
RM
1064 (count) += 16; \
1065 } \
1066 } while (0)
8f5ca04b
RM
1067#endif
1068
24784465
RM
1069#if defined(__sh__) && !__SHMEDIA__ && W_TYPE_SIZE == 32
1070#ifndef __sh1__
e9b3e3c5
UD
1071#define umul_ppmm(w1, w0, u, v) \
1072 __asm__ ( \
24784465
RM
1073 "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \
1074 : "=r<" ((USItype)(w1)), \
1075 "=r<" ((USItype)(w0)) \
e9b3e3c5
UD
1076 : "r" ((USItype)(u)), \
1077 "r" ((USItype)(v)) \
1078 : "macl", "mach")
1079#define UMUL_TIME 5
1080#endif
1081
24784465
RM
1082/* This is the same algorithm as __udiv_qrnnd_c. */
1083#define UDIV_NEEDS_NORMALIZATION 1
1084
1085#define udiv_qrnnd(q, r, n1, n0, d) \
1086 do { \
1087 extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \
48693bea 1088 __attribute__ ((visibility ("hidden"))); \
24784465
RM
1089 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \
1090 __asm__ ( \
1091 "mov%M4 %4,r5\n" \
1092" swap.w %3,r4\n" \
1093" swap.w r5,r6\n" \
1094" jsr @%5\n" \
1095" shll16 r6\n" \
1096" swap.w r4,r4\n" \
1097" jsr @%5\n" \
1098" swap.w r1,%0\n" \
1099" or r1,%0" \
1100 : "=r" (q), "=&z" (r) \
1101 : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \
78fd882a 1102 : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \
24784465
RM
1103 } while (0)
1104
1105#define UDIV_TIME 80
1106
1107#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1108 __asm__ ("clrt;subc %5,%1; subc %4,%0" \
1109 : "=r" (sh), "=r" (sl) \
def7fbd6 1110 : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t")
24784465
RM
1111
1112#endif /* __sh__ */
1113
41b0afab
RM
1114#if defined (__SH5__) && __SHMEDIA__ && W_TYPE_SIZE == 32
1115#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1116#define count_leading_zeros(count, x) \
1117 do \
1118 { \
1119 UDItype x_ = (USItype)(x); \
1120 SItype c_; \
1121 \
1122 __asm__ ("nsb %1, %0" : "=r" (c_) : "r" (x_)); \
1123 (count) = c_ - 31; \
1124 } \
1125 while (0)
1126#define COUNT_LEADING_ZEROS_0 32
1127#endif
1128
1129#if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \
1130 && W_TYPE_SIZE == 32
28f540f4 1131#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1132 __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
1da2d51a
UD
1133 : "=r" ((USItype) (sh)), \
1134 "=&r" ((USItype) (sl)) \
1135 : "%rJ" ((USItype) (ah)), \
1136 "rI" ((USItype) (bh)), \
1137 "%rJ" ((USItype) (al)), \
1138 "rI" ((USItype) (bl)) \
28f540f4
RM
1139 __CLOBBER_CC)
1140#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1141 __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
1da2d51a
UD
1142 : "=r" ((USItype) (sh)), \
1143 "=&r" ((USItype) (sl)) \
1144 : "rJ" ((USItype) (ah)), \
1145 "rI" ((USItype) (bh)), \
1146 "rJ" ((USItype) (al)), \
1147 "rI" ((USItype) (bl)) \
28f540f4 1148 __CLOBBER_CC)
402fe938
DM
1149#if defined (__sparc_v9__)
1150#define umul_ppmm(w1, w0, u, v) \
1151 do { \
1152 register USItype __g1 asm ("g1"); \
1153 __asm__ ("umul\t%2,%3,%1\n\t" \
1154 "srlx\t%1, 32, %0" \
1155 : "=r" ((USItype) (w1)), \
1156 "=r" (__g1) \
1157 : "r" ((USItype) (u)), \
1158 "r" ((USItype) (v))); \
1159 (w0) = __g1; \
1160 } while (0)
1161#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1162 __asm__ ("mov\t%2,%%y\n\t" \
1163 "udiv\t%3,%4,%0\n\t" \
1164 "umul\t%0,%4,%1\n\t" \
1165 "sub\t%3,%1,%1" \
1166 : "=&r" ((USItype) (__q)), \
1167 "=&r" ((USItype) (__r)) \
1168 : "r" ((USItype) (__n1)), \
1169 "r" ((USItype) (__n0)), \
1170 "r" ((USItype) (__d)))
1171#else
28f540f4 1172#if defined (__sparc_v8__)
28f540f4
RM
1173#define umul_ppmm(w1, w0, u, v) \
1174 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1da2d51a
UD
1175 : "=r" ((USItype) (w1)), \
1176 "=r" ((USItype) (w0)) \
1177 : "r" ((USItype) (u)), \
1178 "r" ((USItype) (v)))
41b0afab 1179#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
1da2d51a 1180 __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\
41b0afab
RM
1181 : "=&r" ((USItype) (__q)), \
1182 "=&r" ((USItype) (__r)) \
1183 : "r" ((USItype) (__n1)), \
1184 "r" ((USItype) (__n0)), \
1185 "r" ((USItype) (__d)))
1da2d51a 1186#else
28f540f4
RM
1187#if defined (__sparclite__)
1188/* This has hardware multiply but not divide. It also has two additional
1189 instructions scan (ffs from high bit) and divscc. */
1190#define umul_ppmm(w1, w0, u, v) \
1191 __asm__ ("umul %2,%3,%1;rd %%y,%0" \
1da2d51a
UD
1192 : "=r" ((USItype) (w1)), \
1193 "=r" ((USItype) (w0)) \
1194 : "r" ((USItype) (u)), \
1195 "r" ((USItype) (v)))
28f540f4 1196#define udiv_qrnnd(q, r, n1, n0, d) \
772d0e1a 1197 __asm__ ("! Inlined udiv_qrnnd\n" \
41b0afab
RM
1198" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1199" tst %%g0\n" \
1200" divscc %3,%4,%%g1\n" \
1201" divscc %%g1,%4,%%g1\n" \
1202" divscc %%g1,%4,%%g1\n" \
1203" divscc %%g1,%4,%%g1\n" \
1204" divscc %%g1,%4,%%g1\n" \
1205" divscc %%g1,%4,%%g1\n" \
1206" divscc %%g1,%4,%%g1\n" \
1207" divscc %%g1,%4,%%g1\n" \
1208" divscc %%g1,%4,%%g1\n" \
1209" divscc %%g1,%4,%%g1\n" \
1210" divscc %%g1,%4,%%g1\n" \
1211" divscc %%g1,%4,%%g1\n" \
1212" divscc %%g1,%4,%%g1\n" \
1213" divscc %%g1,%4,%%g1\n" \
1214" divscc %%g1,%4,%%g1\n" \
1215" divscc %%g1,%4,%%g1\n" \
1216" divscc %%g1,%4,%%g1\n" \
1217" divscc %%g1,%4,%%g1\n" \
1218" divscc %%g1,%4,%%g1\n" \
1219" divscc %%g1,%4,%%g1\n" \
1220" divscc %%g1,%4,%%g1\n" \
1221" divscc %%g1,%4,%%g1\n" \
1222" divscc %%g1,%4,%%g1\n" \
1223" divscc %%g1,%4,%%g1\n" \
1224" divscc %%g1,%4,%%g1\n" \
1225" divscc %%g1,%4,%%g1\n" \
1226" divscc %%g1,%4,%%g1\n" \
1227" divscc %%g1,%4,%%g1\n" \
1228" divscc %%g1,%4,%%g1\n" \
1229" divscc %%g1,%4,%%g1\n" \
1230" divscc %%g1,%4,%%g1\n" \
1231" divscc %%g1,%4,%0\n" \
1232" rd %%y,%1\n" \
1233" bl,a 1f\n" \
1234" add %1,%4,%1\n" \
772d0e1a 1235"1: ! End of inline udiv_qrnnd" \
1da2d51a
UD
1236 : "=r" ((USItype) (q)), \
1237 "=r" ((USItype) (r)) \
1238 : "r" ((USItype) (n1)), \
1239 "r" ((USItype) (n0)), \
1240 "rI" ((USItype) (d)) \
e9b3e3c5 1241 : "g1" __AND_CLOBBER_CC)
28f540f4
RM
1242#define UDIV_TIME 37
1243#define count_leading_zeros(count, x) \
41b0afab
RM
1244 do { \
1245 __asm__ ("scan %1,1,%0" \
48693bea
AK
1246 : "=r" ((USItype) (count)) \
1247 : "r" ((USItype) (x))); \
62818cfd 1248 } while (0)
e9b3e3c5
UD
1249/* Early sparclites return 63 for an argument of 0, but they warn that future
1250 implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1251 undefined. */
1da2d51a
UD
1252#else
1253/* SPARC without integer multiplication and divide instructions.
1254 (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */
28f540f4 1255#define umul_ppmm(w1, w0, u, v) \
772d0e1a 1256 __asm__ ("! Inlined umul_ppmm\n" \
41b0afab
RM
1257" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\
1258" sra %3,31,%%o5 ! Don't move this insn\n" \
1259" and %2,%%o5,%%o5 ! Don't move this insn\n" \
1260" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1261" mulscc %%g1,%3,%%g1\n" \
1262" mulscc %%g1,%3,%%g1\n" \
1263" mulscc %%g1,%3,%%g1\n" \
1264" mulscc %%g1,%3,%%g1\n" \
1265" mulscc %%g1,%3,%%g1\n" \
1266" mulscc %%g1,%3,%%g1\n" \
1267" mulscc %%g1,%3,%%g1\n" \
1268" mulscc %%g1,%3,%%g1\n" \
1269" mulscc %%g1,%3,%%g1\n" \
1270" mulscc %%g1,%3,%%g1\n" \
1271" mulscc %%g1,%3,%%g1\n" \
1272" mulscc %%g1,%3,%%g1\n" \
1273" mulscc %%g1,%3,%%g1\n" \
1274" mulscc %%g1,%3,%%g1\n" \
1275" mulscc %%g1,%3,%%g1\n" \
1276" mulscc %%g1,%3,%%g1\n" \
1277" mulscc %%g1,%3,%%g1\n" \
1278" mulscc %%g1,%3,%%g1\n" \
1279" mulscc %%g1,%3,%%g1\n" \
1280" mulscc %%g1,%3,%%g1\n" \
1281" mulscc %%g1,%3,%%g1\n" \
1282" mulscc %%g1,%3,%%g1\n" \
1283" mulscc %%g1,%3,%%g1\n" \
1284" mulscc %%g1,%3,%%g1\n" \
1285" mulscc %%g1,%3,%%g1\n" \
1286" mulscc %%g1,%3,%%g1\n" \
1287" mulscc %%g1,%3,%%g1\n" \
1288" mulscc %%g1,%3,%%g1\n" \
1289" mulscc %%g1,%3,%%g1\n" \
1290" mulscc %%g1,%3,%%g1\n" \
1291" mulscc %%g1,%3,%%g1\n" \
1292" mulscc %%g1,%3,%%g1\n" \
1293" mulscc %%g1,0,%%g1\n" \
1294" add %%g1,%%o5,%0\n" \
1295" rd %%y,%1" \
1da2d51a
UD
1296 : "=r" ((USItype) (w1)), \
1297 "=r" ((USItype) (w0)) \
1298 : "%rI" ((USItype) (u)), \
1299 "r" ((USItype) (v)) \
e9b3e3c5 1300 : "g1", "o5" __AND_CLOBBER_CC)
28f540f4 1301#define UMUL_TIME 39 /* 39 instructions */
390a4882 1302/* It's quite necessary to add this much assembler for the sparc.
41b0afab
RM
1303 The default udiv_qrnnd (in C) is more than 10 times slower! */
1304#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
772d0e1a
AJ
1305 __asm__ ("! Inlined udiv_qrnnd\n" \
1306" mov 32,%%g1\n" \
1307" subcc %1,%2,%%g0\n" \
1308"1: bcs 5f\n" \
41b0afab 1309" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
772d0e1a
AJ
1310" sub %1,%2,%1 ! this kills msb of n\n" \
1311" addx %1,%1,%1 ! so this can't give carry\n" \
1312" subcc %%g1,1,%%g1\n" \
1313"2: bne 1b\n" \
41b0afab 1314" subcc %1,%2,%%g0\n" \
772d0e1a 1315" bcs 3f\n" \
41b0afab 1316" addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \
772d0e1a 1317" b 3f\n" \
41b0afab 1318" sub %1,%2,%1 ! this kills msb of n\n" \
772d0e1a
AJ
1319"4: sub %1,%2,%1\n" \
1320"5: addxcc %1,%1,%1\n" \
1321" bcc 2b\n" \
41b0afab 1322" subcc %%g1,1,%%g1\n" \
772d0e1a
AJ
1323"! Got carry from n. Subtract next step to cancel this carry.\n" \
1324" bne 4b\n" \
41b0afab 1325" addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \
772d0e1a
AJ
1326" sub %1,%2,%1\n" \
1327"3: xnor %0,0,%0\n" \
41b0afab
RM
1328" ! End of inline udiv_qrnnd" \
1329 : "=&r" ((USItype) (__q)), \
1330 "=&r" ((USItype) (__r)) \
1331 : "r" ((USItype) (__d)), \
1332 "1" ((USItype) (__n1)), \
1333 "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC)
1334#define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */
1da2d51a
UD
1335#endif /* __sparclite__ */
1336#endif /* __sparc_v8__ */
402fe938 1337#endif /* __sparc_v9__ */
41b0afab 1338#endif /* sparc32 */
28f540f4 1339
41b0afab
RM
1340#if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \
1341 && W_TYPE_SIZE == 64
e9b3e3c5 1342#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
402fe938
DM
1343 do { \
1344 UDItype __carry = 0; \
1345 __asm__ ("addcc\t%r5,%6,%1\n\t" \
1346 "add\t%r3,%4,%0\n\t" \
1347 "movcs\t%%xcc, 1, %2\n\t" \
1348 "add\t%0, %2, %0" \
1349 : "=r" ((UDItype)(sh)), \
1350 "=&r" ((UDItype)(sl)), \
1351 "+r" (__carry) \
1352 : "%rJ" ((UDItype)(ah)), \
1353 "rI" ((UDItype)(bh)), \
1354 "%rJ" ((UDItype)(al)), \
1355 "rI" ((UDItype)(bl)) \
1356 __CLOBBER_CC); \
1357 } while (0)
e9b3e3c5 1358
402fe938
DM
1359#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1360 do { \
1361 UDItype __carry = 0; \
1362 __asm__ ("subcc\t%r5,%6,%1\n\t" \
1363 "sub\t%r3,%4,%0\n\t" \
1364 "movcs\t%%xcc, 1, %2\n\t" \
2fd6ff13 1365 "sub\t%0, %2, %0" \
402fe938
DM
1366 : "=r" ((UDItype)(sh)), \
1367 "=&r" ((UDItype)(sl)), \
1368 "+r" (__carry) \
1369 : "%rJ" ((UDItype)(ah)), \
1370 "rI" ((UDItype)(bh)), \
1371 "%rJ" ((UDItype)(al)), \
1372 "rI" ((UDItype)(bl)) \
1373 __CLOBBER_CC); \
1374 } while (0)
e9b3e3c5
UD
1375
1376#define umul_ppmm(wh, wl, u, v) \
1377 do { \
1378 UDItype tmp1, tmp2, tmp3, tmp4; \
1379 __asm__ __volatile__ ( \
41b0afab
RM
1380 "srl %7,0,%3\n\t" \
1381 "mulx %3,%6,%1\n\t" \
1382 "srlx %6,32,%2\n\t" \
1383 "mulx %2,%3,%4\n\t" \
1384 "sllx %4,32,%5\n\t" \
1385 "srl %6,0,%3\n\t" \
1386 "sub %1,%5,%5\n\t" \
1387 "srlx %5,32,%5\n\t" \
1388 "addcc %4,%5,%4\n\t" \
1389 "srlx %7,32,%5\n\t" \
1390 "mulx %3,%5,%3\n\t" \
1391 "mulx %2,%5,%5\n\t" \
1392 "sethi %%hi(0x80000000),%2\n\t" \
1393 "addcc %4,%3,%4\n\t" \
1394 "srlx %4,32,%4\n\t" \
1395 "add %2,%2,%2\n\t" \
1396 "movcc %%xcc,%%g0,%2\n\t" \
1397 "addcc %5,%4,%5\n\t" \
1398 "sllx %3,32,%3\n\t" \
1399 "add %1,%3,%1\n\t" \
772d0e1a 1400 "add %5,%2,%0" \
e9b3e3c5
UD
1401 : "=r" ((UDItype)(wh)), \
1402 "=&r" ((UDItype)(wl)), \
1403 "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
1404 : "r" ((UDItype)(u)), \
1405 "r" ((UDItype)(v)) \
1406 __CLOBBER_CC); \
1407 } while (0)
1408#define UMUL_TIME 96
1409#define UDIV_TIME 230
313fed01 1410#endif /* sparc64 */
e9b3e3c5
UD
1411
1412#if defined (__vax__) && W_TYPE_SIZE == 32
28f540f4 1413#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
41b0afab 1414 __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
1da2d51a
UD
1415 : "=g" ((USItype) (sh)), \
1416 "=&g" ((USItype) (sl)) \
1417 : "%0" ((USItype) (ah)), \
1418 "g" ((USItype) (bh)), \
1419 "%1" ((USItype) (al)), \
1420 "g" ((USItype) (bl)))
28f540f4 1421#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
41b0afab 1422 __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
1da2d51a
UD
1423 : "=g" ((USItype) (sh)), \
1424 "=&g" ((USItype) (sl)) \
1425 : "0" ((USItype) (ah)), \
1426 "g" ((USItype) (bh)), \
1427 "1" ((USItype) (al)), \
1428 "g" ((USItype) (bl)))
28f540f4
RM
1429#define umul_ppmm(xh, xl, m0, m1) \
1430 do { \
1da2d51a
UD
1431 union { \
1432 UDItype __ll; \
1433 struct {USItype __l, __h;} __i; \
1434 } __xx; \
28f540f4
RM
1435 USItype __m0 = (m0), __m1 = (m1); \
1436 __asm__ ("emul %1,%2,$0,%0" \
1da2d51a 1437 : "=r" (__xx.__ll) \
28f540f4
RM
1438 : "g" (__m0), \
1439 "g" (__m1)); \
1da2d51a
UD
1440 (xh) = __xx.__i.__h; \
1441 (xl) = __xx.__i.__l; \
28f540f4
RM
1442 (xh) += ((((SItype) __m0 >> 31) & __m1) \
1443 + (((SItype) __m1 >> 31) & __m0)); \
1444 } while (0)
1445#define sdiv_qrnnd(q, r, n1, n0, d) \
1446 do { \
1447 union {DItype __ll; \
1448 struct {SItype __l, __h;} __i; \
1449 } __xx; \
1450 __xx.__i.__h = n1; __xx.__i.__l = n0; \
1451 __asm__ ("ediv %3,%2,%0,%1" \
1452 : "=g" (q), "=g" (r) \
1da2d51a 1453 : "g" (__xx.__ll), "g" (d)); \
28f540f4
RM
1454 } while (0)
1455#endif /* __vax__ */
1456
8115f29b
L
1457#ifdef _TMS320C6X
1458#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1459 do \
1460 { \
1461 UDItype __ll; \
1462 __asm__ ("addu .l1 %1, %2, %0" \
1463 : "=a" (__ll) : "a" (al), "a" (bl)); \
1464 (sl) = (USItype)__ll; \
1465 (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \
1466 } \
1467 while (0)
1468
1469#ifdef _TMS320C6400_PLUS
1470#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
1471#define umul_ppmm(w1, w0, u, v) \
1472 do { \
1473 UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \
1474 (w1) = (USItype) (__x >> 32); \
1475 (w0) = (USItype) (__x); \
1476 } while (0)
1477#endif /* _TMS320C6400_PLUS */
1478
1479#define count_leading_zeros(count, x) ((count) = __builtin_clz (x))
1480#ifdef _TMS320C6400
1481#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x))
1482#endif
1483#define UMUL_TIME 4
1484#define UDIV_TIME 40
1485#endif /* _TMS320C6X */
1486
24784465
RM
1487#if defined (__xtensa__) && W_TYPE_SIZE == 32
1488/* This code is not Xtensa-configuration-specific, so rely on the compiler
1489 to expand builtin functions depending on what configuration features
1490 are available. This avoids library calls when the operation can be
1491 performed in-line. */
1492#define umul_ppmm(w1, w0, u, v) \
1493 do { \
1494 DWunion __w; \
1495 __w.ll = __builtin_umulsidi3 (u, v); \
1496 w1 = __w.s.high; \
1497 w0 = __w.s.low; \
1498 } while (0)
1499#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
1500#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
1501#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
1502#endif /* __xtensa__ */
1503
def7fbd6
AS
1504#if defined xstormy16
1505extern UHItype __stormy16_count_leading_zeros (UHItype);
1506#define count_leading_zeros(count, x) \
1507 do \
1508 { \
1509 UHItype size; \
1510 \
1511 /* We assume that W_TYPE_SIZE is a multiple of 16... */ \
1512 for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \
1513 { \
1514 UHItype c; \
1515 \
1516 c = __clzhi2 ((x) >> (size - 16)); \
1517 (count) += c; \
1518 if (c != 16) \
1519 break; \
1520 } \
1521 } \
1522 while (0)
1523#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
1524#endif
1525
e9b3e3c5
UD
1526#if defined (__z8000__) && W_TYPE_SIZE == 16
1527#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1528 __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1529 : "=r" ((unsigned int)(sh)), \
1530 "=&r" ((unsigned int)(sl)) \
1531 : "%0" ((unsigned int)(ah)), \
1532 "r" ((unsigned int)(bh)), \
1533 "%1" ((unsigned int)(al)), \
1534 "rQR" ((unsigned int)(bl)))
1535#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1536 __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1537 : "=r" ((unsigned int)(sh)), \
1538 "=&r" ((unsigned int)(sl)) \
1539 : "0" ((unsigned int)(ah)), \
1540 "r" ((unsigned int)(bh)), \
1541 "1" ((unsigned int)(al)), \
1542 "rQR" ((unsigned int)(bl)))
1543#define umul_ppmm(xh, xl, m0, m1) \
1544 do { \
1545 union {long int __ll; \
1546 struct {unsigned int __h, __l;} __i; \
1547 } __xx; \
1548 unsigned int __m0 = (m0), __m1 = (m1); \
1549 __asm__ ("mult %S0,%H3" \
1550 : "=r" (__xx.__i.__h), \
1551 "=r" (__xx.__i.__l) \
1552 : "%1" (__m0), \
1553 "rQR" (__m1)); \
1554 (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
1555 (xh) += ((((signed int) __m0 >> 15) & __m1) \
1556 + (((signed int) __m1 >> 15) & __m0)); \
1557 } while (0)
1558#endif /* __z8000__ */
1559
28f540f4
RM
1560#endif /* __GNUC__ */
1561
28f540f4
RM
1562/* If this machine has no inline assembler, use C macros. */
1563
1564#if !defined (add_ssaaaa)
1565#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1566 do { \
e9b3e3c5 1567 UWtype __x; \
28f540f4
RM
1568 __x = (al) + (bl); \
1569 (sh) = (ah) + (bh) + (__x < (al)); \
1570 (sl) = __x; \
1571 } while (0)
1572#endif
1573
1574#if !defined (sub_ddmmss)
1575#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1576 do { \
e9b3e3c5 1577 UWtype __x; \
28f540f4
RM
1578 __x = (al) - (bl); \
1579 (sh) = (ah) - (bh) - (__x > (al)); \
1580 (sl) = __x; \
1581 } while (0)
1582#endif
1583
f30070ae
RM
1584/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1585 smul_ppmm. */
1586#if !defined (umul_ppmm) && defined (smul_ppmm)
1587#define umul_ppmm(w1, w0, u, v) \
1588 do { \
1589 UWtype __w1; \
1590 UWtype __xm0 = (u), __xm1 = (v); \
1591 smul_ppmm (__w1, w0, __xm0, __xm1); \
1592 (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1593 + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1594 } while (0)
1595#endif
1596
1597/* If we still don't have umul_ppmm, define it using plain C. */
28f540f4
RM
1598#if !defined (umul_ppmm)
1599#define umul_ppmm(w1, w0, u, v) \
1600 do { \
e9b3e3c5
UD
1601 UWtype __x0, __x1, __x2, __x3; \
1602 UHWtype __ul, __vl, __uh, __vh; \
28f540f4 1603 \
1da2d51a
UD
1604 __ul = __ll_lowpart (u); \
1605 __uh = __ll_highpart (u); \
1606 __vl = __ll_lowpart (v); \
1607 __vh = __ll_highpart (v); \
28f540f4 1608 \
e9b3e3c5
UD
1609 __x0 = (UWtype) __ul * __vl; \
1610 __x1 = (UWtype) __ul * __vh; \
1611 __x2 = (UWtype) __uh * __vl; \
1612 __x3 = (UWtype) __uh * __vh; \
28f540f4
RM
1613 \
1614 __x1 += __ll_highpart (__x0);/* this can't give carry */ \
1615 __x1 += __x2; /* but this indeed can */ \
1616 if (__x1 < __x2) /* did we get it? */ \
41b0afab 1617 __x3 += __ll_B; /* yes, add it in the proper pos. */ \
28f540f4
RM
1618 \
1619 (w1) = __x3 + __ll_highpart (__x1); \
1da2d51a 1620 (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \
28f540f4
RM
1621 } while (0)
1622#endif
1623
1da2d51a
UD
1624#if !defined (__umulsidi3)
1625#define __umulsidi3(u, v) \
41b0afab 1626 ({DWunion __w; \
1da2d51a
UD
1627 umul_ppmm (__w.s.high, __w.s.low, u, v); \
1628 __w.ll; })
8f5ca04b
RM
1629#endif
1630
28f540f4
RM
1631/* Define this unconditionally, so it can be used for debugging. */
1632#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1633 do { \
e9b3e3c5
UD
1634 UWtype __d1, __d0, __q1, __q0; \
1635 UWtype __r1, __r0, __m; \
28f540f4
RM
1636 __d1 = __ll_highpart (d); \
1637 __d0 = __ll_lowpart (d); \
1638 \
1639 __r1 = (n1) % __d1; \
1640 __q1 = (n1) / __d1; \
e9b3e3c5 1641 __m = (UWtype) __q1 * __d0; \
28f540f4
RM
1642 __r1 = __r1 * __ll_B | __ll_highpart (n0); \
1643 if (__r1 < __m) \
1644 { \
1645 __q1--, __r1 += (d); \
1646 if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1647 if (__r1 < __m) \
1648 __q1--, __r1 += (d); \
1649 } \
1650 __r1 -= __m; \
1651 \
1652 __r0 = __r1 % __d1; \
1653 __q0 = __r1 / __d1; \
e9b3e3c5 1654 __m = (UWtype) __q0 * __d0; \
28f540f4
RM
1655 __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1656 if (__r0 < __m) \
1657 { \
1658 __q0--, __r0 += (d); \
1659 if (__r0 >= (d)) \
1660 if (__r0 < __m) \
1661 __q0--, __r0 += (d); \
1662 } \
1663 __r0 -= __m; \
1664 \
e9b3e3c5 1665 (q) = (UWtype) __q1 * __ll_B | __q0; \
28f540f4
RM
1666 (r) = __r0; \
1667 } while (0)
1668
1669/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1670 __udiv_w_sdiv (defined in libgcc or elsewhere). */
1671#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1672#define udiv_qrnnd(q, r, nh, nl, d) \
1673 do { \
1da2d51a
UD
1674 USItype __r; \
1675 (q) = __udiv_w_sdiv (&__r, nh, nl, d); \
28f540f4
RM
1676 (r) = __r; \
1677 } while (0)
1678#endif
1679
1680/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1681#if !defined (udiv_qrnnd)
1682#define UDIV_NEEDS_NORMALIZATION 1
1683#define udiv_qrnnd __udiv_qrnnd_c
1684#endif
1685
1686#if !defined (count_leading_zeros)
28f540f4
RM
1687#define count_leading_zeros(count, x) \
1688 do { \
e9b3e3c5
UD
1689 UWtype __xr = (x); \
1690 UWtype __a; \
28f540f4 1691 \
e9b3e3c5 1692 if (W_TYPE_SIZE <= 32) \
28f540f4 1693 { \
e9b3e3c5
UD
1694 __a = __xr < ((UWtype)1<<2*__BITS4) \
1695 ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \
1696 : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \
28f540f4
RM
1697 } \
1698 else \
1699 { \
e9b3e3c5 1700 for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
28f540f4
RM
1701 if (((__xr >> __a) & 0xff) != 0) \
1702 break; \
1703 } \
1704 \
e9b3e3c5 1705 (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
28f540f4 1706 } while (0)
e9b3e3c5 1707#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
28f540f4
RM
1708#endif
1709
62818cfd
UD
1710#if !defined (count_trailing_zeros)
1711/* Define count_trailing_zeros using count_leading_zeros. The latter might be
1712 defined in asm, but if it is not, the C version above is good enough. */
1713#define count_trailing_zeros(count, x) \
1714 do { \
e9b3e3c5
UD
1715 UWtype __ctz_x = (x); \
1716 UWtype __ctz_c; \
62818cfd 1717 count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
e9b3e3c5 1718 (count) = W_TYPE_SIZE - 1 - __ctz_c; \
62818cfd
UD
1719 } while (0)
1720#endif
1721
28f540f4
RM
1722#ifndef UDIV_NEEDS_NORMALIZATION
1723#define UDIV_NEEDS_NORMALIZATION 0
1724#endif