1 ; libgcc1 routines for Synopsys DesignWare ARC cpu.
3 /* Copyright (C) 1995-2021 Free Software Foundation, Inc.
4 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
5 on behalf of Synopsys Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 Under Section 7 of GPL version 3, you are granted additional
20 permissions described in the GCC Runtime Library Exception, version
21 3.1, as published by the Free Software Foundation.
23 You should have received a copy of the GNU General Public License and
24 a copy of the GCC Runtime Library Exception along with this program;
25 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26 <http://www.gnu.org/licenses/>. */
28 /* As a special exception, if you link this library with other files,
29 some of which are compiled with GCC, to produce an executable,
30 this library does not by itself cause the resulting executable
31 to be covered by the GNU General Public License.
32 This exception does not however invalidate any other reasons why
33 the executable file might be covered by the GNU General Public License. */
36 /* ANSI concatenation macros. */
38 #define CONCAT1(a, b) CONCAT2(a, b)
39 #define CONCAT2(a, b) a ## b
41 /* Use the right prefix for global labels. */
43 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
45 #ifndef WORKING_ASSEMBLER
51 #define FUNC(X) .type SYM(X),@function
52 #define HIDDEN_FUNC(X) FUNC(X)` .hidden X
53 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
54 #define ENDFUNC(X) ENDFUNC0(X)
57 /* Use object attributes to inform other tools this file is
58 safe for RF16 configuration. */
59 .arc_attribute Tag_ARC_ABI_rf16, 1
69 /* This the simple version.
80 #if defined (__ARC_MUL64__)
86 #elif defined (__ARC_MPY__)
92 #elif defined (__ARC_NORM__)
99 lpnz @.Lend ; loop is aligned
105 #elif !defined (__OPTIMIZE_SIZE__) && defined (__ARC_BARREL_SHIFTER__)
106 /* Up to 3.5 times faster than the simpler code below, but larger. */
126 #elif !defined (__OPTIMIZE_SIZE__) /* __ARC601__ */
144 /********************************************************/
146 mov_s r2,0 ; Accumulate result here.
149 add_s r2,r2,r1 ; r += b
151 lsr_s r0,r0 ; a >>= 1
152 asl_s r1,r1 ; b <<= 1
158 /********************************************************/
161 #endif /* L_mulsi3 */
167 .global SYM(__umulsidi3)
169 HIDDEN_FUNC(__umulsidi3)
170 /* We need ARC700 /ARC_MUL64 definitions of __umulsidi3 / __umulsi3_highpart
171 in case some code has been compiled without multiply support enabled,
172 but linked with the multiply-support enabled libraries.
173 For ARC601 (i.e. without a barrel shifter), we also use umuldisi3 as our
174 umulsi3_highpart implementation; the use of the latter label doesn't
175 actually benefit ARC601 platforms, but is useful when ARC601 code is linked
176 against other libraries. */
177 #if defined (__ARC_MPY__) || defined (__ARC_MUL64__) \
178 || !defined (__ARC_BARREL_SHIFTER__)
179 .global SYM(__umulsi3_highpart)
180 SYM(__umulsi3_highpart):
181 HIDDEN_FUNC(__umulsi3_highpart)
184 /* This the simple version.
194 #include "ieee-754/arc-ieee-754.h"
200 MPYHU DBL0H,r12,DBL0H
201 #elif defined (__ARC_MUL64__)
202 /* Likewise for __ARC_MUL64__ */
207 #else /* !__ARC_MPY__ && !__ARC_MUL64__ */
208 /* Although it might look tempting to extend this to handle muldi3,
209 using mulsi3 twice with 2.25 cycles per 32 bit add is faster
210 than one loop with 3 or four cycles per 32 bit add. */
211 asl.f r12,0 ; Top part of b.
212 mov_s r2,0 ; Accumulate result here.
217 breq r0,0,@.Ldone ; while (a)
219 asl.f r1,r1 ; b <<= 1
220 bbit0.d r0,1,@.Llooptst
224 add.f r3,r3,r1 ; r += b
225 brne.d r0,0,@.Lloop ; while (a);
231 #endif /* !__ARC_MPY__*/
233 #if defined (__ARC_MPY__) || defined (__ARC_MUL64__) \
234 || !defined (__ARC_BARREL_SHIFTER__)
235 ENDFUNC(__umulsi3_highpart)
237 #endif /* L_umulsidi3 */
243 .global SYM(__muldi3)
245 #ifdef __LITTLE_ENDIAN__
290 #endif /* __LITTLE_ENDIAN__ */
292 #endif /* L_muldi3 */
293 #endif /* !__ARC_RF16__ */
295 #ifdef L_umulsi3_highpart
296 #include "ieee-754/arc-ieee-754.h"
297 /* For use without a barrel shifter, and for ARC700 / ARC_MUL64, the
298 mulsidi3 algorithms above look better, so for these, there is an
299 extra label up there. */
300 #if !defined (__ARC_MPY__) && !defined (__ARC_MUL64__) \
301 && defined (__ARC_BARREL_SHIFTER__)
302 .global SYM(__umulsi3_highpart)
303 SYM(__umulsi3_highpart):
304 HIDDEN_FUNC(__umulsi3_highpart)
314 /* Make the result register peephole-compatible with mulsidi3. */
316 ENDFUNC(__umulsi3_highpart)
317 #endif /* !__ARC_MPY__ && __ARC_BARREL_SHIFTER__ */
318 #endif /* L_umulsi3_highpart */
320 #ifdef L_divmod_tools
322 ; Utilities used by all routines.
328 udivmodsi4(int modwanted, unsigned long num, unsigned long den)
330 unsigned long bit = 1;
331 unsigned long res = 0;
333 while (den < num && bit && !(den & (1L<<31)))
348 if (modwanted) return num;
353 ; inputs: r0 = numerator, r1 = denominator
354 ; outputs: r0 = quotient, r1 = remainder, r2/r3 trashed
357 .global SYM(__udivmodsi4)
361 #if defined (__ARC_EA__)
362 /* Normalize divisor and divident, and then use the appropriate number of
363 divaw (the number of result bits, or one more) to produce the result.
364 There are some special conditions that need to be tested:
365 - We can only directly normalize unsigned numbers that fit in 31 bit. For
366 the divisor, we test early on that it is not 'negative'.
367 - divaw can't corrrectly process a divident that is larger than the divisor.
368 We handle this be checking that the divident prior to normalization is
369 not larger than the normalized divisor. As we then already know then
370 that the divisor fits 31 bit, this check also makes sure that the
372 - ordinary normalization of the divident could make it larger than the
373 normalized divisor, which again would be unsuitable for divaw.
374 Thus, we want to shift left the divident by one less, except that we
375 want to leave it alone if it is already 31 bit. To this end, we
376 double the input to norm with adds.
377 - If the divident has less bits than the divisor, that would leave us
378 with a negative number of divaw to execute. Although we could use a
379 conditional loop to avoid excess divaw, and then the quotient could
380 be extracted correctly as there'd be more than enough zero bits, the
381 remainder would be shifted left too far, requiring a conditional shift
382 right. The cost of that shift and the possible mispredict on the
383 conditional loop cost as much as putting in an early check for a zero
386 brne.d r3,r0,.Large_dividend
392 asl_l r0,r0,r3 ; not short to keep loop aligned
396 .Ldiv_end:sub_s r3,r2,1
415 .Ldiv_end2:asl r0,r3,r2
433 #elif !defined (__OPTIMIZE_SIZE__) && !defined (__ARC_RF16__)
434 #if defined (__ARC_NORM__) && defined (__ARC_BARREL_SHIFTER__)
436 brhs.d r1,r2,.Lret0_3
447 #else /* ! __ARC_NORM__ */
449 brhs.d r1,r2,.Lret0_3
452 asl_s r1,r1 ; den <<= 1
453 brls.d r1,r2,@.Lloop1
454 sub lp_count,lp_count,1
459 #if !defined (__ARCEM__) && !defined (__ARCHS__)
464 #endif /* !__ARCEM__ && !__ARCHS__ */
465 #endif /* !__ARC_NORM__ */
469 #if defined (__ARC_BARREL_SHIFTER__)
497 #if 0 /* Slightly shorter, but slower. */
499 brhi.d r1,r0,.Loop3_end
504 rsub r0,lp_count,32-1
516 #else /* Arctangent-A5 */
517 breq_s r1,0,@.Ldivmodend
523 asl_s r1,r1 ; den <<= 1
525 asl_s r2,r2 ; bit <<= 1
527 brlo r0,r1,@.Lshiftdown
528 sub_s r0,r0,r1 ; num -= den
529 or_s r3,r3,r2 ; res |= bit
531 lsr_s r2,r2 ; bit >>= 1
532 lsr_s r1,r1 ; den >>= 1
535 mov_s r1,r0 ; r1 = mod
537 mov_s r0,r3 ; r0 = res
538 /******************************************************/
540 ENDFUNC(__udivmodsi4)
548 .global SYM(__udivsi3)
554 #endif /* L_udivsi3 */
560 .global SYM(__divsi3)
569 bl.d @SYM(__udivmodsi4)
574 #else /* !ifndef __ARC_EA__ */
575 ;; We can use the abs, norm, divaw and mpy instructions for ARC700
578 /* This table has been generated by divtab-arc700.c. */
579 /* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31.
580 For powers of two, we list unnormalized numbers instead. The values
581 for powers of 2 are loaded, but not used. The value for 1 is actually
582 the first instruction after .Lmuldiv. */
849 ; write port allocation stall
871 .Ldivstart:divaw r12,r12,r2
872 .Ldivend:xor_s r1,r1,r0
882 sub1.f 0,r12,r2 ; special case: -2**(n+1) / 2**n
889 ; Need to handle special cases involving negative powers of two:
890 ; r12,r2 are normalized dividend / divisor;
891 ; divide anything by 0x80000000, or divide 0x80000000 by 0x40000000
903 /* This version requires that divaw works with a divisor of 0x80000000U */
916 .Ldivstart:divaw r12,r12,r2
917 .Ldivend:xor_s r1,r1,r0
933 #endif /* ifndef __ARC700__ */
937 #endif /* L_divsi3 */
943 .global SYM(__umodsi3)
947 bl.nd @SYM(__udivmodsi4)
952 #endif /* L_umodsi3 */
958 .global SYM (__modsi3)
966 bl.d @SYM(__udivmodsi4)
972 #else /* __ARC_EA__ */
986 .Ldivstart:divaw r12,r12,r2
992 .Lonebit:neg.pl r5,r5
996 #endif /* !__ARC_EA__ */
999 #endif /* L_modsi3 */
1004 .global SYM (__clzsi2)
1007 HIDDEN_FUNC(__clzsi2)
1013 #elif !defined (__ARC_BARREL_SHIFTER__)
1019 brhs r0,r2,.Loop_end
1023 sub2 r0,lp_count,lp_count
1034 bbit1.d r0,31,.Ldone
1051 #endif /* L_clzsi2 */
1055 ;;; MILLICODE THUNK LIB ;***************
1057 ;;; .macro push_regs from, to, offset
1058 ;;; st_s "\from", [sp, \offset]
1060 ;;; push_regs "(\from+1)", \to, "(\offset+4)"
1063 ;;; push_regs 13, 18, 0
1066 ;;;; .macro sum from, to, three
1070 ;;;; .set regno, \from+1
1072 ;;;; .set shift, shift - 1
1073 ;;;; # st_s %shift @3 lsl #shift
1075 ;;;; sum "(\from+1)", \to, "(\three)"
1082 ;; .macro push_regs from=0, to=3, offset
1083 ;; st_s r\from, [sp, \offset]
1085 ;; push_regs "\from+1 ",\to,"(\offset+4)"
1089 ;; .macro expand_to_push from=13, to
1095 ;; ; push_regs \from, \to, 0
1099 ;; expand_to_push 13,18
1103 #ifndef __ARC_RF16__
1104 #ifdef L_millicodethunk_st
1107 .global SYM(__st_r13_to_r15)
1108 .global SYM(__st_r13_to_r16)
1109 .global SYM(__st_r13_to_r17)
1110 .global SYM(__st_r13_to_r18)
1111 .global SYM(__st_r13_to_r19)
1112 .global SYM(__st_r13_to_r20)
1113 .global SYM(__st_r13_to_r21)
1114 .global SYM(__st_r13_to_r22)
1115 .global SYM(__st_r13_to_r23)
1116 .global SYM(__st_r13_to_r24)
1117 .global SYM(__st_r13_to_r25)
1118 HIDDEN_FUNC(__st_r13_to_r15)
1119 HIDDEN_FUNC(__st_r13_to_r16)
1120 HIDDEN_FUNC(__st_r13_to_r17)
1121 HIDDEN_FUNC(__st_r13_to_r18)
1122 HIDDEN_FUNC(__st_r13_to_r19)
1123 HIDDEN_FUNC(__st_r13_to_r20)
1124 HIDDEN_FUNC(__st_r13_to_r21)
1125 HIDDEN_FUNC(__st_r13_to_r22)
1126 HIDDEN_FUNC(__st_r13_to_r23)
1127 HIDDEN_FUNC(__st_r13_to_r24)
1128 HIDDEN_FUNC(__st_r13_to_r25)
1130 SYM(__st_r13_to_r25):
1132 SYM(__st_r13_to_r24):
1134 SYM(__st_r13_to_r23):
1136 SYM(__st_r13_to_r22):
1138 SYM(__st_r13_to_r21):
1140 SYM(__st_r13_to_r20):
1142 SYM(__st_r13_to_r19):
1144 SYM(__st_r13_to_r18):
1146 SYM(__st_r13_to_r17):
1148 SYM(__st_r13_to_r16):
1150 SYM(__st_r13_to_r15):
1152 st r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
1159 ENDFUNC(__st_r13_to_r15)
1160 ENDFUNC(__st_r13_to_r16)
1161 ENDFUNC(__st_r13_to_r17)
1162 ENDFUNC(__st_r13_to_r18)
1163 ENDFUNC(__st_r13_to_r19)
1164 ENDFUNC(__st_r13_to_r20)
1165 ENDFUNC(__st_r13_to_r21)
1166 ENDFUNC(__st_r13_to_r22)
1167 ENDFUNC(__st_r13_to_r23)
1168 ENDFUNC(__st_r13_to_r24)
1169 ENDFUNC(__st_r13_to_r25)
1170 #endif /* L_millicodethunk_st */
1173 #ifdef L_millicodethunk_ld
1176 ; ==================================
1179 .global SYM(__ld_r13_to_r15)
1180 .global SYM(__ld_r13_to_r16)
1181 .global SYM(__ld_r13_to_r17)
1182 .global SYM(__ld_r13_to_r18)
1183 .global SYM(__ld_r13_to_r19)
1184 .global SYM(__ld_r13_to_r20)
1185 .global SYM(__ld_r13_to_r21)
1186 .global SYM(__ld_r13_to_r22)
1187 .global SYM(__ld_r13_to_r23)
1188 .global SYM(__ld_r13_to_r24)
1189 .global SYM(__ld_r13_to_r25)
1190 HIDDEN_FUNC(__ld_r13_to_r15)
1191 HIDDEN_FUNC(__ld_r13_to_r16)
1192 HIDDEN_FUNC(__ld_r13_to_r17)
1193 HIDDEN_FUNC(__ld_r13_to_r18)
1194 HIDDEN_FUNC(__ld_r13_to_r19)
1195 HIDDEN_FUNC(__ld_r13_to_r20)
1196 HIDDEN_FUNC(__ld_r13_to_r21)
1197 HIDDEN_FUNC(__ld_r13_to_r22)
1198 HIDDEN_FUNC(__ld_r13_to_r23)
1199 HIDDEN_FUNC(__ld_r13_to_r24)
1200 HIDDEN_FUNC(__ld_r13_to_r25)
1201 SYM(__ld_r13_to_r25):
1203 SYM(__ld_r13_to_r24):
1205 SYM(__ld_r13_to_r23):
1207 SYM(__ld_r13_to_r22):
1209 SYM(__ld_r13_to_r21):
1211 SYM(__ld_r13_to_r20):
1213 SYM(__ld_r13_to_r19):
1215 SYM(__ld_r13_to_r18):
1217 SYM(__ld_r13_to_r17):
1219 SYM(__ld_r13_to_r16):
1221 SYM(__ld_r13_to_r15):
1223 ld r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
1230 ENDFUNC(__ld_r13_to_r15)
1231 ENDFUNC(__ld_r13_to_r16)
1232 ENDFUNC(__ld_r13_to_r17)
1233 ENDFUNC(__ld_r13_to_r18)
1234 ENDFUNC(__ld_r13_to_r19)
1235 ENDFUNC(__ld_r13_to_r20)
1236 ENDFUNC(__ld_r13_to_r21)
1237 ENDFUNC(__ld_r13_to_r22)
1238 ENDFUNC(__ld_r13_to_r23)
1239 ENDFUNC(__ld_r13_to_r24)
1240 ENDFUNC(__ld_r13_to_r25)
1242 #endif /* L_millicodethunk_ld */
1243 #ifdef L_millicodethunk_ret
1244 .global SYM(__ld_r13_to_r14_ret)
1245 .global SYM(__ld_r13_to_r15_ret)
1246 .global SYM(__ld_r13_to_r16_ret)
1247 .global SYM(__ld_r13_to_r17_ret)
1248 .global SYM(__ld_r13_to_r18_ret)
1249 .global SYM(__ld_r13_to_r19_ret)
1250 .global SYM(__ld_r13_to_r20_ret)
1251 .global SYM(__ld_r13_to_r21_ret)
1252 .global SYM(__ld_r13_to_r22_ret)
1253 .global SYM(__ld_r13_to_r23_ret)
1254 .global SYM(__ld_r13_to_r24_ret)
1255 .global SYM(__ld_r13_to_r25_ret)
1256 HIDDEN_FUNC(__ld_r13_to_r14_ret)
1257 HIDDEN_FUNC(__ld_r13_to_r15_ret)
1258 HIDDEN_FUNC(__ld_r13_to_r16_ret)
1259 HIDDEN_FUNC(__ld_r13_to_r17_ret)
1260 HIDDEN_FUNC(__ld_r13_to_r18_ret)
1261 HIDDEN_FUNC(__ld_r13_to_r19_ret)
1262 HIDDEN_FUNC(__ld_r13_to_r20_ret)
1263 HIDDEN_FUNC(__ld_r13_to_r21_ret)
1264 HIDDEN_FUNC(__ld_r13_to_r22_ret)
1265 HIDDEN_FUNC(__ld_r13_to_r23_ret)
1266 HIDDEN_FUNC(__ld_r13_to_r24_ret)
1267 HIDDEN_FUNC(__ld_r13_to_r25_ret)
1270 SYM(__ld_r13_to_r25_ret):
1272 SYM(__ld_r13_to_r24_ret):
1274 SYM(__ld_r13_to_r23_ret):
1276 SYM(__ld_r13_to_r22_ret):
1278 SYM(__ld_r13_to_r21_ret):
1280 SYM(__ld_r13_to_r20_ret):
1282 SYM(__ld_r13_to_r19_ret):
1284 SYM(__ld_r13_to_r18_ret):
1286 SYM(__ld_r13_to_r17_ret):
1288 SYM(__ld_r13_to_r16_ret):
1290 SYM(__ld_r13_to_r15_ret):
1292 SYM(__ld_r13_to_r14_ret):
1298 ENDFUNC(__ld_r13_to_r14_ret)
1299 ENDFUNC(__ld_r13_to_r15_ret)
1300 ENDFUNC(__ld_r13_to_r16_ret)
1301 ENDFUNC(__ld_r13_to_r17_ret)
1302 ENDFUNC(__ld_r13_to_r18_ret)
1303 ENDFUNC(__ld_r13_to_r19_ret)
1304 ENDFUNC(__ld_r13_to_r20_ret)
1305 ENDFUNC(__ld_r13_to_r21_ret)
1306 ENDFUNC(__ld_r13_to_r22_ret)
1307 ENDFUNC(__ld_r13_to_r23_ret)
1308 ENDFUNC(__ld_r13_to_r24_ret)
1309 ENDFUNC(__ld_r13_to_r25_ret)
1311 #endif /* L_millicodethunk_ret */
1313 #if defined (__ARC700__) || defined (__ARC_FPX_QUARK__)
1316 #include "ieee-754/adddf3.S"
1322 #include "ieee-754/muldf3.S"
1323 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1324 #include "ieee-754/arc600-mul64/muldf3.S"
1325 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1326 #include "ieee-754/arc600-dsp/muldf3.S"
1332 #include "ieee-754/addsf3.S"
1338 #include "ieee-754/mulsf3.S"
1339 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1340 #include "ieee-754/arc600-mul64/mulsf3.S"
1341 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1342 #include "ieee-754/arc600-dsp/mulsf3.S"
1343 #elif defined (__ARC_NORM__)
1344 #include "ieee-754/arc600/mulsf3.S"
1350 #include "ieee-754/divdf3.S"
1351 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1352 #include "ieee-754/arc600-mul64/divdf3.S"
1353 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1354 #include "ieee-754/arc600-dsp/divdf3.S"
1360 #include "ieee-754/divsf3-stdmul.S"
1361 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1362 #include "ieee-754/arc600-mul64/divsf3.S"
1363 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1364 #include "ieee-754/arc600-dsp/divsf3.S"
1365 #elif defined (__ARC_NORM__)
1366 #include "ieee-754/arc600/divsf3.S"
1370 #ifdef L_extendsfdf2
1372 #include "ieee-754/extendsfdf2.S"
1378 #include "ieee-754/truncdfsf2.S"
1384 #include "ieee-754/floatsidf.S"
1390 #include "ieee-754/floatsisf.S"
1394 #ifdef L_floatunsidf
1396 #include "ieee-754/floatunsidf.S"
1402 #include "ieee-754/fixdfsi.S"
1408 #include "ieee-754/fixsfsi.S"
1414 #include "ieee-754/fixunsdfsi.S"
1420 #include "ieee-754/eqdf2.S"
1426 #include "ieee-754/eqsf2.S"
1432 #include "ieee-754/gtdf2.S"
1438 #include "ieee-754/gtsf2.S"
1444 #include "ieee-754/gedf2.S"
1450 #include "ieee-754/gesf2.S"
1456 #include "ieee-754/uneqdf2.S"
1462 #include "ieee-754/uneqsf2.S"
1468 #include "ieee-754/orddf2.S"
1474 #include "ieee-754/ordsf2.S"
1477 #endif /* ARC_OPTFPE */
1479 #endif /* !__ARC_RF16__ */