1 ; libgcc1 routines for Synopsys DesignWare ARC cpu.
3 /* Copyright (C) 1995-2019 Free Software Foundation, Inc.
4 Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
5 on behalf of Synopsys Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 Under Section 7 of GPL version 3, you are granted additional
20 permissions described in the GCC Runtime Library Exception, version
21 3.1, as published by the Free Software Foundation.
23 You should have received a copy of the GNU General Public License and
24 a copy of the GCC Runtime Library Exception along with this program;
25 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
26 <http://www.gnu.org/licenses/>. */
28 /* As a special exception, if you link this library with other files,
29 some of which are compiled with GCC, to produce an executable,
30 this library does not by itself cause the resulting executable
31 to be covered by the GNU General Public License.
32 This exception does not however invalidate any other reasons why
33 the executable file might be covered by the GNU General Public License. */
36 /* ANSI concatenation macros. */
38 #define CONCAT1(a, b) CONCAT2(a, b)
39 #define CONCAT2(a, b) a ## b
41 /* Use the right prefix for global labels. */
43 #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
45 #ifndef WORKING_ASSEMBLER
51 #define FUNC(X) .type SYM(X),@function
52 #define HIDDEN_FUNC(X) FUNC(X)` .hidden X
53 #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
54 #define ENDFUNC(X) ENDFUNC0(X)
65 /* This the simple version.
76 #if defined (__ARC_MUL64__)
82 #elif defined (__ARC_MPY__)
88 #elif defined (__ARC_NORM__)
95 lpnz @.Lend ; loop is aligned
101 #elif !defined (__OPTIMIZE_SIZE__) && defined (__ARC_BARREL_SHIFTER__)
102 /* Up to 3.5 times faster than the simpler code below, but larger. */
122 #elif !defined (__OPTIMIZE_SIZE__) /* __ARC601__ */
140 /********************************************************/
142 mov_s r2,0 ; Accumulate result here.
145 add_s r2,r2,r1 ; r += b
147 lsr_s r0,r0 ; a >>= 1
148 asl_s r1,r1 ; b <<= 1
154 /********************************************************/
157 #endif /* L_mulsi3 */
163 .global SYM(__umulsidi3)
165 HIDDEN_FUNC(__umulsidi3)
166 /* We need ARC700 /ARC_MUL64 definitions of __umulsidi3 / __umulsi3_highpart
167 in case some code has been compiled without multiply support enabled,
168 but linked with the multiply-support enabled libraries.
169 For ARC601 (i.e. without a barrel shifter), we also use umuldisi3 as our
170 umulsi3_highpart implementation; the use of the latter label doesn't
171 actually benefit ARC601 platforms, but is useful when ARC601 code is linked
172 against other libraries. */
173 #if defined (__ARC_MPY__) || defined (__ARC_MUL64__) \
174 || !defined (__ARC_BARREL_SHIFTER__)
175 .global SYM(__umulsi3_highpart)
176 SYM(__umulsi3_highpart):
177 HIDDEN_FUNC(__umulsi3_highpart)
180 /* This the simple version.
190 #include "ieee-754/arc-ieee-754.h"
196 MPYHU DBL0H,r12,DBL0H
197 #elif defined (__ARC_MUL64__)
198 /* Likewise for __ARC_MUL64__ */
203 #else /* !__ARC_MPY__ && !__ARC_MUL64__ */
204 /* Although it might look tempting to extend this to handle muldi3,
205 using mulsi3 twice with 2.25 cycles per 32 bit add is faster
206 than one loop with 3 or four cycles per 32 bit add. */
207 asl.f r12,0 ; Top part of b.
208 mov_s r2,0 ; Accumulate result here.
213 breq r0,0,@.Ldone ; while (a)
215 asl.f r1,r1 ; b <<= 1
216 bbit0.d r0,1,@.Llooptst
220 add.f r3,r3,r1 ; r += b
221 brne.d r0,0,@.Lloop ; while (a);
227 #endif /* !__ARC_MPY__*/
229 #if defined (__ARC_MPY__) || defined (__ARC_MUL64__) \
230 || !defined (__ARC_BARREL_SHIFTER__)
231 ENDFUNC(__umulsi3_highpart)
233 #endif /* L_umulsidi3 */
238 .global SYM(__muldi3)
240 #ifdef __LITTLE_ENDIAN__
285 #endif /* __LITTLE_ENDIAN__ */
287 #endif /* L_muldi3 */
289 #ifdef L_umulsi3_highpart
290 #include "ieee-754/arc-ieee-754.h"
291 /* For use without a barrel shifter, and for ARC700 / ARC_MUL64, the
292 mulsidi3 algorithms above look better, so for these, there is an
293 extra label up there. */
294 #if !defined (__ARC_MPY__) && !defined (__ARC_MUL64__) \
295 && defined (__ARC_BARREL_SHIFTER__)
296 .global SYM(__umulsi3_highpart)
297 SYM(__umulsi3_highpart):
298 HIDDEN_FUNC(__umulsi3_highpart)
308 /* Make the result register peephole-compatible with mulsidi3. */
310 ENDFUNC(__umulsi3_highpart)
311 #endif /* !__ARC_MPY__ && __ARC_BARREL_SHIFTER__ */
312 #endif /* L_umulsi3_highpart */
314 #ifdef L_divmod_tools
316 ; Utilities used by all routines.
322 udivmodsi4(int modwanted, unsigned long num, unsigned long den)
324 unsigned long bit = 1;
325 unsigned long res = 0;
327 while (den < num && bit && !(den & (1L<<31)))
342 if (modwanted) return num;
347 ; inputs: r0 = numerator, r1 = denominator
348 ; outputs: r0 = quotient, r1 = remainder, r2/r3 trashed
351 .global SYM(__udivmodsi4)
355 #if defined (__ARC_EA__)
356 /* Normalize divisor and divident, and then use the appropriate number of
357 divaw (the number of result bits, or one more) to produce the result.
358 There are some special conditions that need to be tested:
359 - We can only directly normalize unsigned numbers that fit in 31 bit. For
360 the divisor, we test early on that it is not 'negative'.
361 - divaw can't corrrectly process a divident that is larger than the divisor.
362 We handle this be checking that the divident prior to normalization is
363 not larger than the normalized divisor. As we then already know then
364 that the divisor fits 31 bit, this check also makes sure that the
366 - ordinary normalization of the divident could make it larger than the
367 normalized divisor, which again would be unsuitable for divaw.
368 Thus, we want to shift left the divident by one less, except that we
369 want to leave it alone if it is already 31 bit. To this end, we
370 double the input to norm with adds.
371 - If the divident has less bits than the divisor, that would leave us
372 with a negative number of divaw to execute. Although we could use a
373 conditional loop to avoid excess divaw, and then the quotient could
374 be extracted correctly as there'd be more than enough zero bits, the
375 remainder would be shifted left too far, requiring a conditional shift
376 right. The cost of that shift and the possible mispredict on the
377 conditional loop cost as much as putting in an early check for a zero
380 brne.d r3,r0,.Large_dividend
386 asl_l r0,r0,r3 ; not short to keep loop aligned
390 .Ldiv_end:sub_s r3,r2,1
409 .Ldiv_end2:asl r0,r3,r2
427 #elif !defined (__OPTIMIZE_SIZE__) && !defined (__ARC_RF16__)
428 #if defined (__ARC_NORM__) && defined (__ARC_BARREL_SHIFTER__)
430 brhs.d r1,r2,.Lret0_3
441 #else /* ! __ARC_NORM__ */
443 brhs.d r1,r2,.Lret0_3
446 asl_s r1,r1 ; den <<= 1
447 brls.d r1,r2,@.Lloop1
448 sub lp_count,lp_count,1
453 #if !defined (__ARCEM__) && !defined (__ARCHS__)
458 #endif /* !__ARCEM__ && !__ARCHS__ */
459 #endif /* !__ARC_NORM__ */
463 #if defined (__ARC_BARREL_SHIFTER__)
491 #if 0 /* Slightly shorter, but slower. */
493 brhi.d r1,r0,.Loop3_end
498 rsub r0,lp_count,32-1
510 #else /* Arctangent-A5 */
511 breq_s r1,0,@.Ldivmodend
517 asl_s r1,r1 ; den <<= 1
519 asl_s r2,r2 ; bit <<= 1
521 brlo r0,r1,@.Lshiftdown
522 sub_s r0,r0,r1 ; num -= den
523 or_s r3,r3,r2 ; res |= bit
525 lsr_s r2,r2 ; bit >>= 1
526 lsr_s r1,r1 ; den >>= 1
529 mov_s r1,r0 ; r1 = mod
531 mov_s r0,r3 ; r0 = res
532 /******************************************************/
534 ENDFUNC(__udivmodsi4)
542 .global SYM(__udivsi3)
547 #if 0 /* interferes with linux loader */
548 .section .__arc_profile_forward, "a"
550 .long SYM(__udivmodsi4)
554 #endif /* L_udivsi3 */
560 .global SYM(__divsi3)
569 bl.d @SYM(__udivmodsi4)
574 #else /* !ifndef __ARC_EA__ */
575 ;; We can use the abs, norm, divaw and mpy instructions for ARC700
578 /* This table has been generated by divtab-arc700.c. */
579 /* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31.
580 For powers of two, we list unnormalized numbers instead. The values
581 for powers of 2 are loaded, but not used. The value for 1 is actually
582 the first instruction after .Lmuldiv. */
849 ; write port allocation stall
871 .Ldivstart:divaw r12,r12,r2
872 .Ldivend:xor_s r1,r1,r0
882 sub1.f 0,r12,r2 ; special case: -2**(n+1) / 2**n
889 ; Need to handle special cases involving negative powers of two:
890 ; r12,r2 are normalized dividend / divisor;
891 ; divide anything by 0x80000000, or divide 0x80000000 by 0x40000000
903 /* This version requires that divaw works with a divisor of 0x80000000U */
916 .Ldivstart:divaw r12,r12,r2
917 .Ldivend:xor_s r1,r1,r0
933 #endif /* ifndef __ARC700__ */
937 #endif /* L_divsi3 */
943 .global SYM(__umodsi3)
947 bl.nd @SYM(__udivmodsi4)
951 #if 0 /* interferes with linux loader */
952 .section .__arc_profile_forward, "a"
954 .long SYM(__udivmodsi4)
958 #endif /* L_umodsi3 */
964 .global SYM (__modsi3)
972 bl.d @SYM(__udivmodsi4)
978 #else /* __ARC_EA__ */
992 .Ldivstart:divaw r12,r12,r2
998 .Lonebit:neg.pl r5,r5
1002 #endif /* !__ARC_EA__ */
1005 #endif /* L_modsi3 */
1010 .global SYM (__clzsi2)
1013 HIDDEN_FUNC(__clzsi2)
1019 #elif !defined (__ARC_BARREL_SHIFTER__)
1025 brhs r0,r2,.Loop_end
1029 sub2 r0,lp_count,lp_count
1040 bbit1.d r0,31,.Ldone
1057 #endif /* L_clzsi2 */
1061 ;;; MILLICODE THUNK LIB ;***************
1063 ;;; .macro push_regs from, to, offset
1064 ;;; st_s "\from", [sp, \offset]
1066 ;;; push_regs "(\from+1)", \to, "(\offset+4)"
1069 ;;; push_regs 13, 18, 0
1072 ;;;; .macro sum from, to, three
1076 ;;;; .set regno, \from+1
1078 ;;;; .set shift, shift - 1
1079 ;;;; # st_s %shift @3 lsl #shift
1081 ;;;; sum "(\from+1)", \to, "(\three)"
1088 ;; .macro push_regs from=0, to=3, offset
1089 ;; st_s r\from, [sp, \offset]
1091 ;; push_regs "\from+1 ",\to,"(\offset+4)"
1095 ;; .macro expand_to_push from=13, to
1101 ;; ; push_regs \from, \to, 0
1105 ;; expand_to_push 13,18
1109 #ifdef L_millicodethunk_st
1112 .global SYM(__st_r13_to_r15)
1113 .global SYM(__st_r13_to_r16)
1114 .global SYM(__st_r13_to_r17)
1115 .global SYM(__st_r13_to_r18)
1116 .global SYM(__st_r13_to_r19)
1117 .global SYM(__st_r13_to_r20)
1118 .global SYM(__st_r13_to_r21)
1119 .global SYM(__st_r13_to_r22)
1120 .global SYM(__st_r13_to_r23)
1121 .global SYM(__st_r13_to_r24)
1122 .global SYM(__st_r13_to_r25)
1123 HIDDEN_FUNC(__st_r13_to_r15)
1124 HIDDEN_FUNC(__st_r13_to_r16)
1125 HIDDEN_FUNC(__st_r13_to_r17)
1126 HIDDEN_FUNC(__st_r13_to_r18)
1127 HIDDEN_FUNC(__st_r13_to_r19)
1128 HIDDEN_FUNC(__st_r13_to_r20)
1129 HIDDEN_FUNC(__st_r13_to_r21)
1130 HIDDEN_FUNC(__st_r13_to_r22)
1131 HIDDEN_FUNC(__st_r13_to_r23)
1132 HIDDEN_FUNC(__st_r13_to_r24)
1133 HIDDEN_FUNC(__st_r13_to_r25)
1135 SYM(__st_r13_to_r25):
1137 SYM(__st_r13_to_r24):
1139 SYM(__st_r13_to_r23):
1141 SYM(__st_r13_to_r22):
1143 SYM(__st_r13_to_r21):
1145 SYM(__st_r13_to_r20):
1147 SYM(__st_r13_to_r19):
1149 SYM(__st_r13_to_r18):
1151 SYM(__st_r13_to_r17):
1153 SYM(__st_r13_to_r16):
1155 SYM(__st_r13_to_r15):
1157 st r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
1164 ENDFUNC(__st_r13_to_r15)
1165 ENDFUNC(__st_r13_to_r16)
1166 ENDFUNC(__st_r13_to_r17)
1167 ENDFUNC(__st_r13_to_r18)
1168 ENDFUNC(__st_r13_to_r19)
1169 ENDFUNC(__st_r13_to_r20)
1170 ENDFUNC(__st_r13_to_r21)
1171 ENDFUNC(__st_r13_to_r22)
1172 ENDFUNC(__st_r13_to_r23)
1173 ENDFUNC(__st_r13_to_r24)
1174 ENDFUNC(__st_r13_to_r25)
1175 #endif /* L_millicodethunk_st */
1178 #ifdef L_millicodethunk_ld
1181 ; ==================================
1184 .global SYM(__ld_r13_to_r15)
1185 .global SYM(__ld_r13_to_r16)
1186 .global SYM(__ld_r13_to_r17)
1187 .global SYM(__ld_r13_to_r18)
1188 .global SYM(__ld_r13_to_r19)
1189 .global SYM(__ld_r13_to_r20)
1190 .global SYM(__ld_r13_to_r21)
1191 .global SYM(__ld_r13_to_r22)
1192 .global SYM(__ld_r13_to_r23)
1193 .global SYM(__ld_r13_to_r24)
1194 .global SYM(__ld_r13_to_r25)
1195 HIDDEN_FUNC(__ld_r13_to_r15)
1196 HIDDEN_FUNC(__ld_r13_to_r16)
1197 HIDDEN_FUNC(__ld_r13_to_r17)
1198 HIDDEN_FUNC(__ld_r13_to_r18)
1199 HIDDEN_FUNC(__ld_r13_to_r19)
1200 HIDDEN_FUNC(__ld_r13_to_r20)
1201 HIDDEN_FUNC(__ld_r13_to_r21)
1202 HIDDEN_FUNC(__ld_r13_to_r22)
1203 HIDDEN_FUNC(__ld_r13_to_r23)
1204 HIDDEN_FUNC(__ld_r13_to_r24)
1205 HIDDEN_FUNC(__ld_r13_to_r25)
1206 SYM(__ld_r13_to_r25):
1208 SYM(__ld_r13_to_r24):
1210 SYM(__ld_r13_to_r23):
1212 SYM(__ld_r13_to_r22):
1214 SYM(__ld_r13_to_r21):
1216 SYM(__ld_r13_to_r20):
1218 SYM(__ld_r13_to_r19):
1220 SYM(__ld_r13_to_r18):
1222 SYM(__ld_r13_to_r17):
1224 SYM(__ld_r13_to_r16):
1226 SYM(__ld_r13_to_r15):
1228 ld r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
1235 ENDFUNC(__ld_r13_to_r15)
1236 ENDFUNC(__ld_r13_to_r16)
1237 ENDFUNC(__ld_r13_to_r17)
1238 ENDFUNC(__ld_r13_to_r18)
1239 ENDFUNC(__ld_r13_to_r19)
1240 ENDFUNC(__ld_r13_to_r20)
1241 ENDFUNC(__ld_r13_to_r21)
1242 ENDFUNC(__ld_r13_to_r22)
1243 ENDFUNC(__ld_r13_to_r23)
1244 ENDFUNC(__ld_r13_to_r24)
1245 ENDFUNC(__ld_r13_to_r25)
1247 #endif /* L_millicodethunk_ld */
1248 #ifdef L_millicodethunk_ret
1249 .global SYM(__ld_r13_to_r14_ret)
1250 .global SYM(__ld_r13_to_r15_ret)
1251 .global SYM(__ld_r13_to_r16_ret)
1252 .global SYM(__ld_r13_to_r17_ret)
1253 .global SYM(__ld_r13_to_r18_ret)
1254 .global SYM(__ld_r13_to_r19_ret)
1255 .global SYM(__ld_r13_to_r20_ret)
1256 .global SYM(__ld_r13_to_r21_ret)
1257 .global SYM(__ld_r13_to_r22_ret)
1258 .global SYM(__ld_r13_to_r23_ret)
1259 .global SYM(__ld_r13_to_r24_ret)
1260 .global SYM(__ld_r13_to_r25_ret)
1261 HIDDEN_FUNC(__ld_r13_to_r14_ret)
1262 HIDDEN_FUNC(__ld_r13_to_r15_ret)
1263 HIDDEN_FUNC(__ld_r13_to_r16_ret)
1264 HIDDEN_FUNC(__ld_r13_to_r17_ret)
1265 HIDDEN_FUNC(__ld_r13_to_r18_ret)
1266 HIDDEN_FUNC(__ld_r13_to_r19_ret)
1267 HIDDEN_FUNC(__ld_r13_to_r20_ret)
1268 HIDDEN_FUNC(__ld_r13_to_r21_ret)
1269 HIDDEN_FUNC(__ld_r13_to_r22_ret)
1270 HIDDEN_FUNC(__ld_r13_to_r23_ret)
1271 HIDDEN_FUNC(__ld_r13_to_r24_ret)
1272 HIDDEN_FUNC(__ld_r13_to_r25_ret)
1275 SYM(__ld_r13_to_r25_ret):
1277 SYM(__ld_r13_to_r24_ret):
1279 SYM(__ld_r13_to_r23_ret):
1281 SYM(__ld_r13_to_r22_ret):
1283 SYM(__ld_r13_to_r21_ret):
1285 SYM(__ld_r13_to_r20_ret):
1287 SYM(__ld_r13_to_r19_ret):
1289 SYM(__ld_r13_to_r18_ret):
1291 SYM(__ld_r13_to_r17_ret):
1293 SYM(__ld_r13_to_r16_ret):
1295 SYM(__ld_r13_to_r15_ret):
1297 SYM(__ld_r13_to_r14_ret):
1303 ENDFUNC(__ld_r13_to_r14_ret)
1304 ENDFUNC(__ld_r13_to_r15_ret)
1305 ENDFUNC(__ld_r13_to_r16_ret)
1306 ENDFUNC(__ld_r13_to_r17_ret)
1307 ENDFUNC(__ld_r13_to_r18_ret)
1308 ENDFUNC(__ld_r13_to_r19_ret)
1309 ENDFUNC(__ld_r13_to_r20_ret)
1310 ENDFUNC(__ld_r13_to_r21_ret)
1311 ENDFUNC(__ld_r13_to_r22_ret)
1312 ENDFUNC(__ld_r13_to_r23_ret)
1313 ENDFUNC(__ld_r13_to_r24_ret)
1314 ENDFUNC(__ld_r13_to_r25_ret)
1316 #endif /* L_millicodethunk_ret */
1318 #define ARC_OPTFPE (defined (__ARC700__) || defined (__ARC_FPX_QUARK__))
1323 #include "ieee-754/adddf3.S"
1329 #include "ieee-754/muldf3.S"
1330 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1331 #include "ieee-754/arc600-mul64/muldf3.S"
1332 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1333 #include "ieee-754/arc600-dsp/muldf3.S"
1339 #include "ieee-754/addsf3.S"
1345 #include "ieee-754/mulsf3.S"
1346 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1347 #include "ieee-754/arc600-mul64/mulsf3.S"
1348 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1349 #include "ieee-754/arc600-dsp/mulsf3.S"
1350 #elif defined (__ARC_NORM__)
1351 #include "ieee-754/arc600/mulsf3.S"
1357 #include "ieee-754/divdf3.S"
1358 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1359 #include "ieee-754/arc600-mul64/divdf3.S"
1360 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1361 #include "ieee-754/arc600-dsp/divdf3.S"
1367 #include "ieee-754/divsf3-stdmul.S"
1368 #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
1369 #include "ieee-754/arc600-mul64/divsf3.S"
1370 #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
1371 #include "ieee-754/arc600-dsp/divsf3.S"
1372 #elif defined (__ARC_NORM__)
1373 #include "ieee-754/arc600/divsf3.S"
1377 #ifdef L_extendsfdf2
1379 #include "ieee-754/extendsfdf2.S"
1385 #include "ieee-754/truncdfsf2.S"
1391 #include "ieee-754/floatsidf.S"
1397 #include "ieee-754/floatsisf.S"
1401 #ifdef L_floatunsidf
1403 #include "ieee-754/floatunsidf.S"
1409 #include "ieee-754/fixdfsi.S"
1415 #include "ieee-754/fixsfsi.S"
1421 #include "ieee-754/fixunsdfsi.S"
1427 #include "ieee-754/eqdf2.S"
1433 #include "ieee-754/eqsf2.S"
1439 #include "ieee-754/gtdf2.S"
1445 #include "ieee-754/gtsf2.S"
1451 #include "ieee-754/gedf2.S"
1457 #include "ieee-754/gesf2.S"
1463 #include "ieee-754/uneqdf2.S"
1469 #include "ieee-754/uneqsf2.S"
1475 #include "ieee-754/orddf2.S"
1481 #include "ieee-754/ordsf2.S"
1484 #endif /* ARC_OPTFPE */