]>
Commit | Line | Data |
---|---|---|
d38a64b4 JR |
1 | ; libgcc1 routines for Synopsys DesignWare ARC cpu. |
2 | ||
99dee823 | 3 | /* Copyright (C) 1995-2021 Free Software Foundation, Inc. |
d38a64b4 JR |
4 | Contributor: Joern Rennecke <joern.rennecke@embecosm.com> |
5 | on behalf of Synopsys Inc. | |
6 | ||
7 | This file is part of GCC. | |
8 | ||
9 | GCC is free software; you can redistribute it and/or modify it under | |
10 | the terms of the GNU General Public License as published by the Free | |
11 | Software Foundation; either version 3, or (at your option) any later | |
12 | version. | |
13 | ||
14 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
15 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 | for more details. | |
18 | ||
19 | Under Section 7 of GPL version 3, you are granted additional | |
20 | permissions described in the GCC Runtime Library Exception, version | |
21 | 3.1, as published by the Free Software Foundation. | |
22 | ||
23 | You should have received a copy of the GNU General Public License and | |
24 | a copy of the GCC Runtime Library Exception along with this program; | |
25 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
26 | <http://www.gnu.org/licenses/>. */ | |
27 | ||
28 | /* As a special exception, if you link this library with other files, | |
29 | some of which are compiled with GCC, to produce an executable, | |
30 | this library does not by itself cause the resulting executable | |
31 | to be covered by the GNU General Public License. | |
32 | This exception does not however invalidate any other reasons why | |
33 | the executable file might be covered by the GNU General Public License. */ | |
34 | ||
48c842ab | 35 | |
d38a64b4 | 36 | /* ANSI concatenation macros. */ |
48c842ab | 37 | |
d38a64b4 JR |
38 | #define CONCAT1(a, b) CONCAT2(a, b) |
39 | #define CONCAT2(a, b) a ## b | |
48c842ab | 40 | |
d38a64b4 | 41 | /* Use the right prefix for global labels. */ |
48c842ab | 42 | |
d38a64b4 | 43 | #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) |
48c842ab | 44 | |
d38a64b4 JR |
45 | #ifndef WORKING_ASSEMBLER |
46 | #define abs_l abs | |
47 | #define asl_l asl | |
48 | #define mov_l mov | |
49 | #endif | |
48c842ab | 50 | |
d38a64b4 JR |
51 | #define FUNC(X) .type SYM(X),@function |
52 | #define HIDDEN_FUNC(X) FUNC(X)` .hidden X | |
53 | #define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X | |
54 | #define ENDFUNC(X) ENDFUNC0(X) | |
55 | ||
2744b8b2 CZ |
56 | #ifdef __ARC_RF16__ |
57 | /* Use object attributes to inform other tools this file is | |
58 | safe for RF16 configuration. */ | |
59 | .arc_attribute Tag_ARC_ABI_rf16, 1 | |
60 | #endif | |
48c842ab | 61 | |
d38a64b4 JR |
62 | #ifdef L_mulsi3 |
63 | .section .text | |
64 | .align 4 | |
65 | ||
66 | .global SYM(__mulsi3) | |
67 | SYM(__mulsi3): | |
68 | ||
69 | /* This the simple version. | |
70 | ||
48c842ab | 71 | while (a) |
d38a64b4 JR |
72 | { |
73 | if (a & 1) | |
48c842ab | 74 | r += b; |
d38a64b4 JR |
75 | a >>= 1; |
76 | b <<= 1; | |
77 | } | |
78 | */ | |
79 | ||
80 | #if defined (__ARC_MUL64__) | |
81 | FUNC(__mulsi3) | |
82 | mulu64 r0,r1 | |
83 | j_s.d [blink] | |
84 | mov_s r0,mlo | |
85 | ENDFUNC(__mulsi3) | |
d64af69f | 86 | #elif defined (__ARC_MPY__) |
d38a64b4 JR |
87 | HIDDEN_FUNC(__mulsi3) |
88 | mpyu r0,r0,r1 | |
89 | nop_s | |
90 | j_s [blink] | |
91 | ENDFUNC(__mulsi3) | |
92 | #elif defined (__ARC_NORM__) | |
93 | FUNC(__mulsi3) | |
94 | norm.f r2,r0 | |
95 | rsub lp_count,r2,31 | |
96 | mov.mi lp_count,32 | |
97 | mov_s r2,r0 | |
98 | mov_s r0,0 | |
99 | lpnz @.Lend ; loop is aligned | |
100 | lsr.f r2,r2 | |
101 | add.cs r0,r0,r1 | |
102 | add_s r1,r1,r1 | |
103 | .Lend: j_s [blink] | |
104 | ENDFUNC(__mulsi3) | |
d64af69f | 105 | #elif !defined (__OPTIMIZE_SIZE__) && defined (__ARC_BARREL_SHIFTER__) |
d38a64b4 JR |
106 | /* Up to 3.5 times faster than the simpler code below, but larger. */ |
107 | FUNC(__mulsi3) | |
108 | ror.f r2,r0,4 | |
109 | mov_s r0,0 | |
110 | add3.mi r0,r0,r1 | |
111 | asl.f r2,r2,2 | |
112 | add2.cs r0,r0,r1 | |
113 | jeq_s [blink] | |
114 | .Loop: | |
115 | add1.mi r0,r0,r1 | |
116 | asl.f r2,r2,2 | |
117 | add.cs r0,r0,r1 | |
118 | asl_s r1,r1,4 | |
119 | ror.f r2,r2,8 | |
120 | add3.mi r0,r0,r1 | |
121 | asl.f r2,r2,2 | |
122 | bne.d .Loop | |
123 | add2.cs r0,r0,r1 | |
124 | j_s [blink] | |
125 | ENDFUNC(__mulsi3) | |
126 | #elif !defined (__OPTIMIZE_SIZE__) /* __ARC601__ */ | |
127 | FUNC(__mulsi3) | |
128 | lsr.f r2,r0 | |
129 | mov_s r0,0 | |
130 | mov_s r3,0 | |
131 | add.cs r0,r0,r1 | |
132 | .Loop: | |
133 | lsr.f r2,r2 | |
134 | add1.cs r0,r0,r1 | |
135 | lsr.f r2,r2 | |
136 | add2.cs r0,r0,r1 | |
137 | lsr.f r2,r2 | |
138 | add3.cs r0,r0,r1 | |
48c842ab | 139 | bne.d .Loop |
d38a64b4 JR |
140 | add3 r1,r3,r1 |
141 | j_s [blink] | |
142 | ENDFUNC(__mulsi3) | |
143 | #else | |
144 | /********************************************************/ | |
145 | FUNC(__mulsi3) | |
146 | mov_s r2,0 ; Accumulate result here. | |
147 | .Lloop: | |
148 | bbit0 r0,0,@.Ly | |
149 | add_s r2,r2,r1 ; r += b | |
48c842ab | 150 | .Ly: |
d38a64b4 | 151 | lsr_s r0,r0 ; a >>= 1 |
48c842ab CZ |
152 | asl_s r1,r1 ; b <<= 1 |
153 | brne_s r0,0,@.Lloop | |
d38a64b4 JR |
154 | .Ldone: |
155 | j_s.d [blink] | |
156 | mov_s r0,r2 | |
157 | ENDFUNC(__mulsi3) | |
158 | /********************************************************/ | |
159 | #endif | |
48c842ab | 160 | |
d38a64b4 JR |
161 | #endif /* L_mulsi3 */ |
162 | ||
163 | #ifdef L_umulsidi3 | |
164 | .section .text | |
165 | .align 4 | |
166 | ||
167 | .global SYM(__umulsidi3) | |
168 | SYM(__umulsidi3): | |
169 | HIDDEN_FUNC(__umulsidi3) | |
170 | /* We need ARC700 /ARC_MUL64 definitions of __umulsidi3 / __umulsi3_highpart | |
171 | in case some code has been compiled without multiply support enabled, | |
172 | but linked with the multiply-support enabled libraries. | |
173 | For ARC601 (i.e. without a barrel shifter), we also use umuldisi3 as our | |
174 | umulsi3_highpart implementation; the use of the latter label doesn't | |
175 | actually benefit ARC601 platforms, but is useful when ARC601 code is linked | |
176 | against other libraries. */ | |
d64af69f CZ |
177 | #if defined (__ARC_MPY__) || defined (__ARC_MUL64__) \ |
178 | || !defined (__ARC_BARREL_SHIFTER__) | |
d38a64b4 JR |
179 | .global SYM(__umulsi3_highpart) |
180 | SYM(__umulsi3_highpart): | |
181 | HIDDEN_FUNC(__umulsi3_highpart) | |
182 | #endif | |
183 | ||
184 | /* This the simple version. | |
185 | ||
48c842ab | 186 | while (a) |
d38a64b4 JR |
187 | { |
188 | if (a & 1) | |
48c842ab | 189 | r += b; |
d38a64b4 JR |
190 | a >>= 1; |
191 | b <<= 1; | |
192 | } | |
193 | */ | |
194 | #include "ieee-754/arc-ieee-754.h" | |
195 | ||
d64af69f | 196 | #ifdef __ARC_MPY__ |
d38a64b4 JR |
197 | mov_s r12,DBL0L |
198 | mpyu DBL0L,r12,DBL0H | |
199 | j_s.d [blink] | |
d64af69f | 200 | MPYHU DBL0H,r12,DBL0H |
d38a64b4 JR |
201 | #elif defined (__ARC_MUL64__) |
202 | /* Likewise for __ARC_MUL64__ */ | |
203 | mulu64 r0,r1 | |
204 | mov_s DBL0L,mlo | |
205 | j_s.d [blink] | |
206 | mov_s DBL0H,mhi | |
d64af69f | 207 | #else /* !__ARC_MPY__ && !__ARC_MUL64__ */ |
d38a64b4 JR |
208 | /* Although it might look tempting to extend this to handle muldi3, |
209 | using mulsi3 twice with 2.25 cycles per 32 bit add is faster | |
210 | than one loop with 3 or four cycles per 32 bit add. */ | |
211 | asl.f r12,0 ; Top part of b. | |
212 | mov_s r2,0 ; Accumulate result here. | |
213 | bbit1.d r0,0,@.Ladd | |
214 | mov_s r3,0 | |
215 | .Llooptst: | |
216 | rlc r12,r12 | |
217 | breq r0,0,@.Ldone ; while (a) | |
218 | .Lloop: | |
219 | asl.f r1,r1 ; b <<= 1 | |
220 | bbit0.d r0,1,@.Llooptst | |
221 | lsr r0,r0 ; a >>= 1 | |
222 | rlc r12,r12 | |
223 | .Ladd: | |
224 | add.f r3,r3,r1 ; r += b | |
225 | brne.d r0,0,@.Lloop ; while (a); | |
226 | adc r2,r2,r12 | |
227 | .Ldone: | |
228 | mov_s DBL0L,r3 | |
229 | j_s.d [blink] | |
230 | mov DBL0H,r2 | |
d64af69f | 231 | #endif /* !__ARC_MPY__*/ |
d38a64b4 | 232 | ENDFUNC(__umulsidi3) |
d64af69f CZ |
233 | #if defined (__ARC_MPY__) || defined (__ARC_MUL64__) \ |
234 | || !defined (__ARC_BARREL_SHIFTER__) | |
d38a64b4 JR |
235 | ENDFUNC(__umulsi3_highpart) |
236 | #endif | |
237 | #endif /* L_umulsidi3 */ | |
238 | ||
2744b8b2 | 239 | #ifndef __ARC_RF16__ |
8180cde0 CZ |
240 | #ifdef L_muldi3 |
241 | .section .text | |
242 | .align 4 | |
243 | .global SYM(__muldi3) | |
244 | SYM(__muldi3): | |
245 | #ifdef __LITTLE_ENDIAN__ | |
246 | push_s blink | |
247 | mov_s r4,r3 ;4 | |
248 | mov_s r5,r2 ;4 | |
249 | mov_s r9,r0 ;4 | |
250 | mov_s r8,r1 ;4 | |
251 | bl.d @__umulsidi3 | |
252 | mov_s r1,r2 ;4 | |
253 | mov_s r6,r0 ;4 | |
254 | mov_s r7,r1 ;4 | |
255 | mov_s r0,r9 ;4 | |
256 | bl.d @__mulsi3 | |
257 | mov_s r1,r4 ;4 | |
258 | mov_s r4,r0 ;4 | |
259 | mov_s r1,r8 ;4 | |
260 | bl.d @__mulsi3 | |
261 | mov_s r0,r5 ;4 | |
262 | pop_s blink | |
263 | add_s r0,r0,r4 ;2 | |
264 | add r1,r0,r7 | |
265 | j_s.d [blink] | |
266 | mov_s r0,r6 ;4 | |
267 | #else | |
268 | push_s blink | |
269 | mov_s r5,r3 | |
270 | mov_s r9,r2 | |
271 | mov_s r4,r1 | |
272 | mov_s r8,r0 | |
273 | mov_s r0,r1 | |
274 | bl.d @__umulsidi3 | |
275 | mov_s r1,r3 | |
276 | mov_s r7,r0 | |
277 | mov_s r6,r1 | |
278 | mov_s r0,r4 | |
279 | bl.d @__mulsi3 | |
280 | mov_s r1,r9 | |
281 | mov_s r4,r0 | |
282 | mov_s r1,r8 | |
283 | bl.d @__mulsi3 | |
284 | mov_s r0,r5 | |
285 | pop_s blink | |
286 | add_s r0,r0,r4 | |
287 | add_s r0,r0,r7 | |
288 | j_s.d [blink] | |
289 | mov_s r1,r6 | |
290 | #endif /* __LITTLE_ENDIAN__ */ | |
291 | ENDFUNC(__muldi3) | |
292 | #endif /* L_muldi3 */ | |
2744b8b2 | 293 | #endif /* !__ARC_RF16__ */ |
8180cde0 | 294 | |
d38a64b4 JR |
295 | #ifdef L_umulsi3_highpart |
296 | #include "ieee-754/arc-ieee-754.h" | |
297 | /* For use without a barrel shifter, and for ARC700 / ARC_MUL64, the | |
298 | mulsidi3 algorithms above look better, so for these, there is an | |
299 | extra label up there. */ | |
d64af69f CZ |
300 | #if !defined (__ARC_MPY__) && !defined (__ARC_MUL64__) \ |
301 | && defined (__ARC_BARREL_SHIFTER__) | |
d38a64b4 JR |
302 | .global SYM(__umulsi3_highpart) |
303 | SYM(__umulsi3_highpart): | |
304 | HIDDEN_FUNC(__umulsi3_highpart) | |
305 | mov_s r2,0 | |
306 | mov_s r3,32 | |
307 | .Loop: | |
308 | lsr.f r0,r0 | |
309 | add.cs.f r2,r2,r1 | |
310 | sub_s r3,r3,1 | |
311 | brne.d r0,0,.Loop | |
312 | rrc r2,r2 | |
313 | j_s.d [blink] | |
314 | /* Make the result register peephole-compatible with mulsidi3. */ | |
315 | lsr DBL0H,r2,r3 | |
316 | ENDFUNC(__umulsi3_highpart) | |
d64af69f | 317 | #endif /* !__ARC_MPY__ && __ARC_BARREL_SHIFTER__ */ |
d38a64b4 JR |
318 | #endif /* L_umulsi3_highpart */ |
319 | ||
320 | #ifdef L_divmod_tools | |
321 | ||
322 | ; Utilities used by all routines. | |
323 | ||
324 | .section .text | |
325 | ||
326 | /* | |
327 | unsigned long | |
328 | udivmodsi4(int modwanted, unsigned long num, unsigned long den) | |
329 | { | |
330 | unsigned long bit = 1; | |
331 | unsigned long res = 0; | |
332 | ||
333 | while (den < num && bit && !(den & (1L<<31))) | |
334 | { | |
335 | den <<=1; | |
336 | bit <<=1; | |
337 | } | |
338 | while (bit) | |
339 | { | |
340 | if (num >= den) | |
341 | { | |
342 | num -= den; | |
343 | res |= bit; | |
344 | } | |
345 | bit >>=1; | |
346 | den >>=1; | |
347 | } | |
348 | if (modwanted) return num; | |
349 | return res; | |
350 | } | |
351 | */ | |
352 | ||
353 | ; inputs: r0 = numerator, r1 = denominator | |
354 | ; outputs: r0 = quotient, r1 = remainder, r2/r3 trashed | |
355 | ||
356 | .balign 4 | |
357 | .global SYM(__udivmodsi4) | |
358 | FUNC(__udivmodsi4) | |
359 | SYM(__udivmodsi4): | |
360 | ||
d64af69f | 361 | #if defined (__ARC_EA__) |
d38a64b4 JR |
362 | /* Normalize divisor and divident, and then use the appropriate number of |
363 | divaw (the number of result bits, or one more) to produce the result. | |
364 | There are some special conditions that need to be tested: | |
365 | - We can only directly normalize unsigned numbers that fit in 31 bit. For | |
366 | the divisor, we test early on that it is not 'negative'. | |
367 | - divaw can't corrrectly process a divident that is larger than the divisor. | |
368 | We handle this be checking that the divident prior to normalization is | |
369 | not larger than the normalized divisor. As we then already know then | |
370 | that the divisor fits 31 bit, this check also makes sure that the | |
371 | divident fits. | |
372 | - ordinary normalization of the divident could make it larger than the | |
373 | normalized divisor, which again would be unsuitable for divaw. | |
374 | Thus, we want to shift left the divident by one less, except that we | |
375 | want to leave it alone if it is already 31 bit. To this end, we | |
376 | double the input to norm with adds. | |
377 | - If the divident has less bits than the divisor, that would leave us | |
378 | with a negative number of divaw to execute. Although we could use a | |
379 | conditional loop to avoid excess divaw, and then the quotient could | |
380 | be extracted correctly as there'd be more than enough zero bits, the | |
381 | remainder would be shifted left too far, requiring a conditional shift | |
382 | right. The cost of that shift and the possible mispredict on the | |
383 | conditional loop cost as much as putting in an early check for a zero | |
384 | result. */ | |
385 | bmsk r3,r0,29 | |
386 | brne.d r3,r0,.Large_dividend | |
387 | norm.f r2,r1 | |
388 | brlo r0,r1,.Lret0 | |
389 | norm r3,r0 | |
390 | asl_s r1,r1,r2 | |
391 | sub_s r3,r3,1 | |
392 | asl_l r0,r0,r3 ; not short to keep loop aligned | |
393 | sub lp_count,r2,r3 | |
394 | lp .Ldiv_end | |
395 | divaw r0,r0,r1 | |
396 | .Ldiv_end:sub_s r3,r2,1 | |
397 | lsr r1,r0,r2 | |
398 | j_s.d [blink] | |
399 | bmsk r0,r0,r3 | |
400 | ||
401 | .balign 4 | |
402 | .Large_dividend: | |
403 | bmi .Ltrivial | |
404 | asl_s r1,r1,r2 | |
405 | mov_s r3,0 | |
406 | sub1.f r4,r0,r1 | |
407 | mov.lo r4,r0 | |
408 | mov.hs r3,2 | |
409 | cmp r4,r1 | |
410 | sub.hs r4,r4,r1 | |
411 | add.hs r3,r3,1 | |
412 | mov.f lp_count,r2 | |
413 | lpne .Ldiv_end2 | |
414 | divaw r4,r4,r1 | |
415 | .Ldiv_end2:asl r0,r3,r2 | |
416 | lsr r1,r4,r2 | |
417 | sub_s r2,r2,1 | |
418 | bmsk r4,r4,r2 | |
419 | j_s.d [blink] | |
420 | or.ne r0,r0,r4 | |
421 | ||
422 | .Lret0: | |
423 | mov_s r1,r0 | |
424 | j_s.d [blink] | |
425 | mov_l r0,0 | |
426 | .balign 4 | |
427 | .Ltrivial: | |
428 | sub.f r1,r0,r1 | |
429 | mov.c r1,r0 | |
430 | mov_s r0,1 | |
431 | j_s.d [blink] | |
432 | mov.c r0,0 | |
048c6a9a | 433 | #elif !defined (__OPTIMIZE_SIZE__) && !defined (__ARC_RF16__) |
d64af69f | 434 | #if defined (__ARC_NORM__) && defined (__ARC_BARREL_SHIFTER__) |
d38a64b4 JR |
435 | lsr_s r2,r0 |
436 | brhs.d r1,r2,.Lret0_3 | |
437 | norm r2,r2 | |
438 | norm r3,r1 | |
439 | sub_s r3,r3,r2 | |
440 | asl_s r1,r1,r3 | |
441 | sub1.f 0,r0,r1 | |
442 | lsr.cs r1,r1,1 | |
443 | sbc r2,r3,0 | |
444 | sub1 r0,r0,r1 | |
445 | cmp_s r0,r1 | |
446 | mov.f lp_count,r2 | |
447 | #else /* ! __ARC_NORM__ */ | |
448 | lsr_s r2,r0 | |
449 | brhs.d r1,r2,.Lret0_3 | |
450 | mov lp_count,32 | |
451 | .Lloop1: | |
452 | asl_s r1,r1 ; den <<= 1 | |
453 | brls.d r1,r2,@.Lloop1 | |
454 | sub lp_count,lp_count,1 | |
455 | sub_s r0,r0,r1 | |
456 | lsr_s r1,r1 | |
457 | cmp_s r0,r1 | |
458 | xor.f r2,lp_count,31 | |
d64af69f | 459 | #if !defined (__ARCEM__) && !defined (__ARCHS__) |
d38a64b4 | 460 | mov_s lp_count,r2 |
c0ab1970 CZ |
461 | #else |
462 | mov lp_count,r2 | |
463 | nop_s | |
d64af69f | 464 | #endif /* !__ARCEM__ && !__ARCHS__ */ |
d38a64b4 JR |
465 | #endif /* !__ARC_NORM__ */ |
466 | sub.cc r0,r0,r1 | |
467 | mov_s r3,3 | |
468 | sbc r3,r3,0 | |
d64af69f | 469 | #if defined (__ARC_BARREL_SHIFTER__) |
d38a64b4 JR |
470 | asl_s r3,r3,r2 |
471 | rsub r1,r1,1 | |
472 | lpne @.Lloop2_end | |
473 | add1.f r0,r1,r0 | |
474 | sub.cc r0,r0,r1 | |
475 | .Lloop2_end: | |
476 | lsr r1,r0,r2 | |
477 | #else | |
478 | rsub r1,r1,1 | |
479 | lpne @.Lloop2_end | |
480 | asl_s r3,r3 | |
481 | add1.f r0,r1,r0 | |
482 | sub.cc r0,r0,r1 | |
483 | .Lloop2_end: | |
484 | lsr_s r1,r0 | |
485 | lsr.f lp_count,r2 | |
486 | mov.cc r1,r0 | |
487 | lpnz 1f | |
488 | lsr_s r1,r1 | |
489 | lsr_s r1,r1 | |
490 | 1: | |
491 | #endif | |
492 | bmsk r0,r0,r2 | |
493 | bclr r0,r0,r2 | |
494 | j_s.d [blink] | |
495 | or_s r0,r0,r3 | |
496 | .Lret0_3: | |
497 | #if 0 /* Slightly shorter, but slower. */ | |
498 | lp .Loop3_end | |
499 | brhi.d r1,r0,.Loop3_end | |
500 | sub_s r0,r0,r1 | |
501 | .Loop3_end | |
502 | add_s r1,r1,r0 | |
503 | j_s.d [blink] | |
504 | rsub r0,lp_count,32-1 | |
505 | #else | |
506 | mov_s r4,r1 | |
507 | sub.f r1,r0,r1 | |
508 | sbc r0,r0,r0 | |
509 | sub.cc.f r1,r1,r4 | |
510 | sbc r0,r0,0 | |
511 | sub.cc.f r1,r1,r4 | |
512 | sbc r0,r0,-3 | |
513 | j_s.d [blink] | |
514 | add.cs r1,r1,r4 | |
515 | #endif | |
516 | #else /* Arctangent-A5 */ | |
517 | breq_s r1,0,@.Ldivmodend | |
518 | mov_s r2,1 ; bit = 1 | |
519 | mov_s r3,0 ; res = 0 | |
520 | .Lloop1: | |
48c842ab | 521 | brhs r1,r0,@.Lloop2 |
d38a64b4 JR |
522 | bbit1 r1,31,@.Lloop2 |
523 | asl_s r1,r1 ; den <<= 1 | |
524 | b.d @.Lloop1 | |
525 | asl_s r2,r2 ; bit <<= 1 | |
526 | .Lloop2: | |
48c842ab | 527 | brlo r0,r1,@.Lshiftdown |
d38a64b4 JR |
528 | sub_s r0,r0,r1 ; num -= den |
529 | or_s r3,r3,r2 ; res |= bit | |
530 | .Lshiftdown: | |
531 | lsr_s r2,r2 ; bit >>= 1 | |
48c842ab | 532 | lsr_s r1,r1 ; den >>= 1 |
d38a64b4 JR |
533 | brne_s r2,0,@.Lloop2 |
534 | .Ldivmodend: | |
535 | mov_s r1,r0 ; r1 = mod | |
536 | j.d [blink] | |
537 | mov_s r0,r3 ; r0 = res | |
538 | /******************************************************/ | |
539 | #endif | |
540 | ENDFUNC(__udivmodsi4) | |
541 | ||
542 | #endif | |
543 | ||
544 | #ifdef L_udivsi3 | |
545 | .section .text | |
546 | .align 4 | |
547 | ||
548 | .global SYM(__udivsi3) | |
549 | FUNC(__udivsi3) | |
550 | SYM(__udivsi3): | |
551 | b @SYM(__udivmodsi4) | |
552 | ENDFUNC(__udivsi3) | |
d38a64b4 JR |
553 | |
554 | #endif /* L_udivsi3 */ | |
555 | ||
556 | #ifdef L_divsi3 | |
557 | .section .text | |
558 | .align 4 | |
559 | ||
560 | .global SYM(__divsi3) | |
561 | FUNC(__divsi3) | |
562 | ||
d64af69f | 563 | #ifndef __ARC_EA__ |
d38a64b4 JR |
564 | SYM(__divsi3): |
565 | /* A5 / ARC60? */ | |
048c6a9a CZ |
566 | mov r12,blink |
567 | xor r11,r0,r1 | |
d38a64b4 JR |
568 | abs_s r0,r0 |
569 | bl.d @SYM(__udivmodsi4) | |
048c6a9a CZ |
570 | abs_s r1,r1 |
571 | tst r11,r11 | |
572 | j.d [r12] | |
573 | neg.mi r0,r0 | |
d64af69f | 574 | #else /* !ifndef __ARC_EA__ */ |
d38a64b4 JR |
575 | ;; We can use the abs, norm, divaw and mpy instructions for ARC700 |
576 | #define MULDIV | |
577 | #ifdef MULDIV | |
578 | /* This table has been generated by divtab-arc700.c. */ | |
579 | /* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31. | |
580 | For powers of two, we list unnormalized numbers instead. The values | |
581 | for powers of 2 are loaded, but not used. The value for 1 is actually | |
582 | the first instruction after .Lmuldiv. */ | |
583 | .balign 4 | |
584 | .Ldivtab: | |
585 | ||
586 | .long 0x1000000 | |
587 | .long 0x80808081 | |
588 | .long 0x81020409 | |
589 | .long 0x81848DA9 | |
590 | .long 0x82082083 | |
591 | .long 0x828CBFBF | |
592 | .long 0x83126E98 | |
593 | .long 0x83993053 | |
594 | .long 0x84210843 | |
595 | .long 0x84A9F9C9 | |
596 | .long 0x85340854 | |
597 | .long 0x85BF3762 | |
598 | .long 0x864B8A7E | |
599 | .long 0x86D90545 | |
600 | .long 0x8767AB60 | |
601 | .long 0x87F78088 | |
602 | .long 0x88888889 | |
603 | .long 0x891AC73B | |
604 | .long 0x89AE408A | |
605 | .long 0x8A42F871 | |
606 | .long 0x8AD8F2FC | |
607 | .long 0x8B70344B | |
608 | .long 0x8C08C08D | |
609 | .long 0x8CA29C05 | |
610 | .long 0x8D3DCB09 | |
611 | .long 0x8DDA5203 | |
612 | .long 0x8E78356E | |
613 | .long 0x8F1779DA | |
614 | .long 0x8FB823EF | |
615 | .long 0x905A3864 | |
616 | .long 0x90FDBC0A | |
617 | .long 0x91A2B3C5 | |
618 | .long 0x92492493 | |
619 | .long 0x92F11385 | |
620 | .long 0x939A85C5 | |
621 | .long 0x94458095 | |
622 | .long 0x94F20950 | |
623 | .long 0x95A02569 | |
624 | .long 0x964FDA6D | |
625 | .long 0x97012E03 | |
626 | .long 0x97B425EE | |
627 | .long 0x9868C80A | |
628 | .long 0x991F1A52 | |
629 | .long 0x99D722DB | |
630 | .long 0x9A90E7DA | |
631 | .long 0x9B4C6F9F | |
632 | .long 0x9C09C09D | |
633 | .long 0x9CC8E161 | |
634 | .long 0x9D89D89E | |
635 | .long 0x9E4CAD24 | |
636 | .long 0x9F1165E8 | |
637 | .long 0x9FD809FE | |
638 | .long 0xA0A0A0A1 | |
639 | .long 0xA16B312F | |
640 | .long 0xA237C32C | |
641 | .long 0xA3065E40 | |
642 | .long 0xA3D70A3E | |
643 | .long 0xA4A9CF1E | |
644 | .long 0xA57EB503 | |
645 | .long 0xA655C43A | |
646 | .long 0xA72F053A | |
647 | .long 0xA80A80A9 | |
648 | .long 0xA8E83F58 | |
649 | .long 0xA9C84A48 | |
650 | .long 0xAAAAAAAB | |
651 | .long 0xAB8F69E3 | |
652 | .long 0xAC769185 | |
653 | .long 0xAD602B59 | |
654 | .long 0xAE4C415D | |
655 | .long 0xAF3ADDC7 | |
656 | .long 0xB02C0B03 | |
657 | .long 0xB11FD3B9 | |
658 | .long 0xB21642C9 | |
659 | .long 0xB30F6353 | |
660 | .long 0xB40B40B5 | |
661 | .long 0xB509E68B | |
662 | .long 0xB60B60B7 | |
663 | .long 0xB70FBB5B | |
664 | .long 0xB81702E1 | |
665 | .long 0xB92143FB | |
666 | .long 0xBA2E8BA3 | |
667 | .long 0xBB3EE722 | |
668 | .long 0xBC52640C | |
669 | .long 0xBD691048 | |
670 | .long 0xBE82FA0C | |
671 | .long 0xBFA02FE9 | |
672 | .long 0xC0C0C0C1 | |
673 | .long 0xC1E4BBD6 | |
674 | .long 0xC30C30C4 | |
675 | .long 0xC4372F86 | |
676 | .long 0xC565C87C | |
677 | .long 0xC6980C6A | |
678 | .long 0xC7CE0C7D | |
679 | .long 0xC907DA4F | |
680 | .long 0xCA4587E7 | |
681 | .long 0xCB8727C1 | |
682 | .long 0xCCCCCCCD | |
683 | .long 0xCE168A78 | |
684 | .long 0xCF6474A9 | |
685 | .long 0xD0B69FCC | |
686 | .long 0xD20D20D3 | |
687 | .long 0xD3680D37 | |
688 | .long 0xD4C77B04 | |
689 | .long 0xD62B80D7 | |
690 | .long 0xD79435E6 | |
691 | .long 0xD901B204 | |
692 | .long 0xDA740DA8 | |
693 | .long 0xDBEB61EF | |
694 | .long 0xDD67C8A7 | |
695 | .long 0xDEE95C4D | |
696 | .long 0xE070381D | |
697 | .long 0xE1FC780F | |
698 | .long 0xE38E38E4 | |
699 | .long 0xE525982B | |
700 | .long 0xE6C2B449 | |
701 | .long 0xE865AC7C | |
702 | .long 0xEA0EA0EB | |
703 | .long 0xEBBDB2A6 | |
704 | .long 0xED7303B6 | |
705 | .long 0xEF2EB720 | |
706 | .long 0xF0F0F0F1 | |
707 | .long 0xF2B9D649 | |
708 | .long 0xF4898D60 | |
709 | .long 0xF6603D99 | |
710 | .long 0xF83E0F84 | |
711 | .long 0xFA232CF3 | |
712 | .long 0xFC0FC0FD | |
713 | .long 0xFE03F810 | |
714 | .long 0x2000000 | |
715 | .long 0x81020409 | |
716 | .long 0x82082083 | |
717 | .long 0x83126E98 | |
718 | .long 0x84210843 | |
719 | .long 0x85340854 | |
720 | .long 0x864B8A7E | |
721 | .long 0x8767AB60 | |
722 | .long 0x88888889 | |
723 | .long 0x89AE408A | |
724 | .long 0x8AD8F2FC | |
725 | .long 0x8C08C08D | |
726 | .long 0x8D3DCB09 | |
727 | .long 0x8E78356E | |
728 | .long 0x8FB823EF | |
729 | .long 0x90FDBC0A | |
730 | .long 0x92492493 | |
731 | .long 0x939A85C5 | |
732 | .long 0x94F20950 | |
733 | .long 0x964FDA6D | |
734 | .long 0x97B425EE | |
735 | .long 0x991F1A52 | |
736 | .long 0x9A90E7DA | |
737 | .long 0x9C09C09D | |
738 | .long 0x9D89D89E | |
739 | .long 0x9F1165E8 | |
740 | .long 0xA0A0A0A1 | |
741 | .long 0xA237C32C | |
742 | .long 0xA3D70A3E | |
743 | .long 0xA57EB503 | |
744 | .long 0xA72F053A | |
745 | .long 0xA8E83F58 | |
746 | .long 0xAAAAAAAB | |
747 | .long 0xAC769185 | |
748 | .long 0xAE4C415D | |
749 | .long 0xB02C0B03 | |
750 | .long 0xB21642C9 | |
751 | .long 0xB40B40B5 | |
752 | .long 0xB60B60B7 | |
753 | .long 0xB81702E1 | |
754 | .long 0xBA2E8BA3 | |
755 | .long 0xBC52640C | |
756 | .long 0xBE82FA0C | |
757 | .long 0xC0C0C0C1 | |
758 | .long 0xC30C30C4 | |
759 | .long 0xC565C87C | |
760 | .long 0xC7CE0C7D | |
761 | .long 0xCA4587E7 | |
762 | .long 0xCCCCCCCD | |
763 | .long 0xCF6474A9 | |
764 | .long 0xD20D20D3 | |
765 | .long 0xD4C77B04 | |
766 | .long 0xD79435E6 | |
767 | .long 0xDA740DA8 | |
768 | .long 0xDD67C8A7 | |
769 | .long 0xE070381D | |
770 | .long 0xE38E38E4 | |
771 | .long 0xE6C2B449 | |
772 | .long 0xEA0EA0EB | |
773 | .long 0xED7303B6 | |
774 | .long 0xF0F0F0F1 | |
775 | .long 0xF4898D60 | |
776 | .long 0xF83E0F84 | |
777 | .long 0xFC0FC0FD | |
778 | .long 0x4000000 | |
779 | .long 0x82082083 | |
780 | .long 0x84210843 | |
781 | .long 0x864B8A7E | |
782 | .long 0x88888889 | |
783 | .long 0x8AD8F2FC | |
784 | .long 0x8D3DCB09 | |
785 | .long 0x8FB823EF | |
786 | .long 0x92492493 | |
787 | .long 0x94F20950 | |
788 | .long 0x97B425EE | |
789 | .long 0x9A90E7DA | |
790 | .long 0x9D89D89E | |
791 | .long 0xA0A0A0A1 | |
792 | .long 0xA3D70A3E | |
793 | .long 0xA72F053A | |
794 | .long 0xAAAAAAAB | |
795 | .long 0xAE4C415D | |
796 | .long 0xB21642C9 | |
797 | .long 0xB60B60B7 | |
798 | .long 0xBA2E8BA3 | |
799 | .long 0xBE82FA0C | |
800 | .long 0xC30C30C4 | |
801 | .long 0xC7CE0C7D | |
802 | .long 0xCCCCCCCD | |
803 | .long 0xD20D20D3 | |
804 | .long 0xD79435E6 | |
805 | .long 0xDD67C8A7 | |
806 | .long 0xE38E38E4 | |
807 | .long 0xEA0EA0EB | |
808 | .long 0xF0F0F0F1 | |
809 | .long 0xF83E0F84 | |
810 | .long 0x8000000 | |
811 | .long 0x84210843 | |
812 | .long 0x88888889 | |
813 | .long 0x8D3DCB09 | |
814 | .long 0x92492493 | |
815 | .long 0x97B425EE | |
816 | .long 0x9D89D89E | |
817 | .long 0xA3D70A3E | |
818 | .long 0xAAAAAAAB | |
819 | .long 0xB21642C9 | |
820 | .long 0xBA2E8BA3 | |
821 | .long 0xC30C30C4 | |
822 | .long 0xCCCCCCCD | |
823 | .long 0xD79435E6 | |
824 | .long 0xE38E38E4 | |
825 | .long 0xF0F0F0F1 | |
826 | .long 0x10000000 | |
827 | .long 0x88888889 | |
828 | .long 0x92492493 | |
829 | .long 0x9D89D89E | |
830 | .long 0xAAAAAAAB | |
831 | .long 0xBA2E8BA3 | |
832 | .long 0xCCCCCCCD | |
833 | .long 0xE38E38E4 | |
834 | .long 0x20000000 | |
835 | .long 0x92492493 | |
836 | .long 0xAAAAAAAB | |
837 | .long 0xCCCCCCCD | |
838 | .long 0x40000000 | |
839 | .long 0xAAAAAAAB | |
840 | .long 0x80000000 | |
841 | __muldiv: | |
842 | neg r4,r2 | |
843 | ld.as r5,[pcl,r4] | |
844 | abs_s r12,r0 | |
48c842ab CZ |
845 | bic.f 0,r2,r4 |
846 | mpyhu.ne r12,r12,r5 | |
d38a64b4 JR |
847 | norm r3,r2 |
848 | xor.f 0,r0,r1 | |
48c842ab CZ |
849 | ; write port allocation stall |
850 | rsub r3,r3,30 | |
851 | lsr r0,r12,r3 | |
852 | j_s.d [blink] | |
853 | neg.mi r0,r0 | |
d38a64b4 JR |
854 | |
855 | .balign 4 | |
856 | SYM(__divsi3): | |
857 | norm r3,r1 | |
858 | abs_s r2,r1 | |
859 | brhs r3,23,__muldiv | |
860 | norm r4,r0 | |
861 | abs_l r12,r0 | |
862 | brhs r4,r3,.Lonebit | |
863 | asl_s r2,r2,r3 | |
864 | asl r12,r12,r4 | |
865 | sub lp_count,r3,r4 | |
866 | sub.f r12,r12,r2 | |
867 | brge.d r12,r2,.Lsbit | |
868 | sub r4,r3,r4 | |
869 | add.lo r12,r12,r2 | |
870 | lp .Ldivend | |
871 | .Ldivstart:divaw r12,r12,r2 | |
872 | .Ldivend:xor_s r1,r1,r0 | |
873 | sub r0,r4,1 | |
874 | bmsk r0,r12,r0 | |
875 | bset.hs r0,r0,r4 | |
876 | tst_s r1,r1 | |
877 | j_s.d [blink] | |
878 | neg.mi r0,r0 | |
879 | .Lonebit: | |
880 | xor_s r1,r1,r0 | |
881 | asr_s r1,r1,31 | |
882 | sub1.f 0,r12,r2 ; special case: -2**(n+1) / 2**n | |
883 | or r0,r1,1 | |
884 | add.eq r0,r0,r0 | |
885 | cmp_s r12,r2 | |
886 | j_s.d [blink] | |
887 | mov.lo r0,0 | |
888 | .Lsbit: | |
889 | ; Need to handle special cases involving negative powers of two: | |
890 | ; r12,r2 are normalized dividend / divisor; | |
891 | ; divide anything by 0x80000000, or divide 0x80000000 by 0x40000000 | |
892 | add_s r12,r12,r2 | |
893 | xor_s r1,r1,r0 | |
894 | rsub r4,r4,-1 | |
895 | ror r0,r12,r4 | |
896 | tst_s r2,r2 | |
897 | bmsk r0,r0,r3 | |
898 | add.pl r0,r0,r0 | |
899 | tst_s r1,r1 | |
900 | j_s.d [blink] | |
901 | neg.mi r0,r0 | |
902 | #else /* !MULDIV */ | |
903 | /* This version requires that divaw works with a divisor of 0x80000000U */ | |
904 | abs_s r2,r1 | |
905 | norm r4,r0 | |
906 | neg_s r3,r2 | |
907 | norm r3,r3 | |
908 | abs_s r12,r0 | |
909 | brhs r4,r3,.Lonebit | |
910 | asl_s r2,r2,r3 | |
911 | asl r12,r12,r4 | |
912 | sub lp_count,r3,r4 | |
913 | cmp_s r12,r2 | |
914 | sub.hs r12,r12,r2 | |
915 | lp .Ldivend | |
916 | .Ldivstart:divaw r12,r12,r2 | |
917 | .Ldivend:xor_s r1,r1,r0 | |
918 | sub_s r0,r3,1 | |
919 | bmsk r0,r12,r0 | |
920 | bset.hs r0,r0,r3 | |
921 | tst_s r1,r1 | |
922 | j_s.d [blink] | |
923 | negmi r0,r0 | |
924 | .Lonebit: | |
925 | xor_s r1,r1,r0 | |
926 | asr_s r1,r1,31 | |
927 | cmp_s r12,r2 | |
928 | mov_s r0,0 | |
929 | j_s.d [blink] | |
930 | orhs r0,r1,1 | |
931 | #endif /* MULDIV */ | |
932 | ||
933 | #endif /* ifndef __ARC700__ */ | |
934 | ENDFUNC(__divsi3) | |
935 | ||
48c842ab | 936 | |
d38a64b4 JR |
937 | #endif /* L_divsi3 */ |
938 | ||
939 | #ifdef L_umodsi3 | |
940 | .section .text | |
941 | .align 4 | |
942 | ||
943 | .global SYM(__umodsi3) | |
944 | FUNC(__umodsi3) | |
945 | SYM(__umodsi3): | |
946 | mov r7,blink | |
947 | bl.nd @SYM(__udivmodsi4) | |
948 | j.d [r7] | |
949 | mov r0,r1 | |
950 | ENDFUNC(__umodsi3) | |
d38a64b4 JR |
951 | |
952 | #endif /* L_umodsi3 */ | |
953 | ||
954 | #ifdef L_modsi3 | |
955 | .section .text | |
956 | .align 4 | |
957 | ||
958 | .global SYM (__modsi3) | |
959 | FUNC(__modsi3) | |
960 | SYM(__modsi3): | |
d64af69f | 961 | #ifndef __ARC_EA__ |
d38a64b4 JR |
962 | /* A5 / ARC60? */ |
963 | mov_s r12,blink | |
048c6a9a | 964 | mov_s r11,r0 |
d38a64b4 JR |
965 | abs_s r0,r0 |
966 | bl.d @SYM(__udivmodsi4) | |
048c6a9a CZ |
967 | abs_s r1,r1 |
968 | tst r11,r11 | |
d38a64b4 JR |
969 | neg_s r0,r1 |
970 | j_s.d [r12] | |
048c6a9a | 971 | mov.pl r0,r1 |
d64af69f | 972 | #else /* __ARC_EA__ */ |
d38a64b4 JR |
973 | abs_s r2,r1 |
974 | norm.f r4,r0 | |
975 | neg r5,r2 | |
976 | norm r3,r5 | |
977 | abs_l r12,r0 | |
978 | brhs r4,r3,.Lonebit | |
979 | asl_s r2,r2,r3 | |
980 | asl r12,r12,r4 | |
981 | sub lp_count,r3,r4 | |
982 | cmp_s r12,r2 | |
983 | sub.hs r12,r12,r2 | |
984 | tst_s r0,r0 | |
985 | lp .Ldivend | |
986 | .Ldivstart:divaw r12,r12,r2 | |
987 | .Ldivend: | |
988 | lsr r0,r12,r3 | |
989 | j_s.d [blink] | |
990 | neg.mi r0,r0 | |
991 | .balign 4 | |
992 | .Lonebit:neg.pl r5,r5 | |
993 | cmp_s r12,r2 | |
994 | j_s.d [blink] | |
995 | sub.hs r0,r0,r5 | |
d64af69f | 996 | #endif /* !__ARC_EA__ */ |
d38a64b4 JR |
997 | ENDFUNC(__modsi3) |
998 | ||
999 | #endif /* L_modsi3 */ | |
1000 | ||
1001 | #ifdef L_clzsi2 | |
1002 | .section .text | |
1003 | .align 4 | |
1004 | .global SYM (__clzsi2) | |
48c842ab | 1005 | SYM(__clzsi2): |
d38a64b4 JR |
1006 | #ifdef __ARC_NORM__ |
1007 | HIDDEN_FUNC(__clzsi2) | |
1008 | norm.f r0,r0 | |
1009 | mov.n r0,0 | |
1010 | j_s.d [blink] | |
1011 | add.pl r0,r0,1 | |
1012 | ENDFUNC(__clzsi2) | |
d64af69f | 1013 | #elif !defined (__ARC_BARREL_SHIFTER__) |
d38a64b4 JR |
1014 | FUNC(__clzsi2) |
1015 | mov lp_count,10 | |
1016 | mov_l r1,0 | |
1017 | bset r2,r1,29 | |
1018 | lp .Loop_end | |
1019 | brhs r0,r2,.Loop_end | |
1020 | add3 r0,r1,r0 | |
1021 | .Loop_end: | |
1022 | asl.f 0,r0 | |
1023 | sub2 r0,lp_count,lp_count | |
1024 | sub.cs.f r0,r0,1 | |
1025 | add r0,r0,31 | |
1026 | j_s.d [blink] | |
1027 | add.pl r0,r0,1 | |
1028 | ENDFUNC(__clzsi2) | |
1029 | #else | |
1030 | FUNC(__clzsi2) | |
1031 | asl.f 0,r0,2 | |
1032 | mov r1,-1 | |
1033 | .Lcheck: | |
1034 | bbit1.d r0,31,.Ldone | |
1035 | asl.pl r0,r0,3 | |
1036 | bcs.d .Ldone_1 | |
1037 | add_s r1,r1,3 | |
1038 | bpnz.d .Lcheck | |
1039 | asl.f 0,r0,2 | |
1040 | mov_s r0,32 | |
1041 | j_s.d [blink] | |
1042 | mov.ne r0,r1 | |
1043 | .Ldone: | |
1044 | j_s.d [blink] | |
1045 | add_s r0,r1,1 | |
1046 | .Ldone_1: | |
1047 | j_s.d [blink] | |
1048 | sub_s r0,r1,1 | |
1049 | ENDFUNC(__clzsi2) | |
1050 | #endif | |
1051 | #endif /* L_clzsi2 */ | |
1052 | .section .text | |
1053 | ||
1054 | ||
1055 | ;;; MILLICODE THUNK LIB ;*************** | |
48c842ab | 1056 | |
d38a64b4 JR |
1057 | ;;; .macro push_regs from, to, offset |
1058 | ;;; st_s "\from", [sp, \offset] | |
1059 | ;;; .if \to-\from | |
1060 | ;;; push_regs "(\from+1)", \to, "(\offset+4)" | |
1061 | ;;; .endif | |
1062 | ;;; .endm | |
1063 | ;;; push_regs 13, 18, 0 | |
1064 | ;;; | |
1065 | ||
1066 | ;;;; .macro sum from, to, three | |
1067 | ;;;; .long \from | |
1068 | ;;;; .long \three | |
1069 | ;;;; .local regno | |
1070 | ;;;; .set regno, \from+1 | |
1071 | ;;;; .set shift, 32 | |
1072 | ;;;; .set shift, shift - 1 | |
48c842ab | 1073 | ;;;; # st_s %shift @3 lsl #shift |
d38a64b4 JR |
1074 | ;;;; .if \to-\from |
1075 | ;;;; sum "(\from+1)", \to, "(\three)" | |
48c842ab | 1076 | ;;;; .endif |
d38a64b4 | 1077 | ;;;; .endm |
48c842ab | 1078 | ;;;; |
d38a64b4 | 1079 | ;;;; SUM 0,5, 9 |
48c842ab CZ |
1080 | ;;;; |
1081 | ; .altmacro | |
d38a64b4 JR |
1082 | ;; .macro push_regs from=0, to=3, offset |
1083 | ;; st_s r\from, [sp, \offset] | |
1084 | ;; .if \to-\from | |
1085 | ;; push_regs "\from+1 ",\to,"(\offset+4)" | |
1086 | ;; .endif | |
1087 | ;; .endm | |
48c842ab | 1088 | ;; |
d38a64b4 JR |
1089 | ;; .macro expand_to_push from=13, to |
1090 | ;; ; .section .text | |
1091 | ;; ; .align 4 | |
1092 | ;; ; .global st_ | |
1093 | ;; ; .type foo, | |
1094 | ;; st_13_to_25: | |
1095 | ;; ; push_regs \from, \to, 0 | |
48c842ab | 1096 | ;; push_regs 0,3 ; |
d38a64b4 | 1097 | ;; .endm |
48c842ab | 1098 | ;; |
d38a64b4 | 1099 | ;; expand_to_push 13,18 |
48c842ab | 1100 | ;; |
d38a64b4 JR |
1101 | ;#endif |
1102 | ||
2744b8b2 | 1103 | #ifndef __ARC_RF16__ |
d38a64b4 JR |
1104 | #ifdef L_millicodethunk_st |
1105 | .section .text | |
1106 | .align 4 | |
1107 | .global SYM(__st_r13_to_r15) | |
1108 | .global SYM(__st_r13_to_r16) | |
1109 | .global SYM(__st_r13_to_r17) | |
1110 | .global SYM(__st_r13_to_r18) | |
1111 | .global SYM(__st_r13_to_r19) | |
1112 | .global SYM(__st_r13_to_r20) | |
1113 | .global SYM(__st_r13_to_r21) | |
1114 | .global SYM(__st_r13_to_r22) | |
1115 | .global SYM(__st_r13_to_r23) | |
1116 | .global SYM(__st_r13_to_r24) | |
1117 | .global SYM(__st_r13_to_r25) | |
1118 | HIDDEN_FUNC(__st_r13_to_r15) | |
1119 | HIDDEN_FUNC(__st_r13_to_r16) | |
1120 | HIDDEN_FUNC(__st_r13_to_r17) | |
1121 | HIDDEN_FUNC(__st_r13_to_r18) | |
1122 | HIDDEN_FUNC(__st_r13_to_r19) | |
1123 | HIDDEN_FUNC(__st_r13_to_r20) | |
1124 | HIDDEN_FUNC(__st_r13_to_r21) | |
1125 | HIDDEN_FUNC(__st_r13_to_r22) | |
1126 | HIDDEN_FUNC(__st_r13_to_r23) | |
1127 | HIDDEN_FUNC(__st_r13_to_r24) | |
1128 | HIDDEN_FUNC(__st_r13_to_r25) | |
1129 | .align 4 | |
1130 | SYM(__st_r13_to_r25): | |
1131 | st r25, [sp,48] | |
48c842ab | 1132 | SYM(__st_r13_to_r24): |
d38a64b4 | 1133 | st r24, [sp,44] |
48c842ab | 1134 | SYM(__st_r13_to_r23): |
d38a64b4 | 1135 | st r23, [sp,40] |
48c842ab | 1136 | SYM(__st_r13_to_r22): |
d38a64b4 | 1137 | st r22, [sp,36] |
48c842ab | 1138 | SYM(__st_r13_to_r21): |
d38a64b4 | 1139 | st r21, [sp,32] |
48c842ab CZ |
1140 | SYM(__st_r13_to_r20): |
1141 | st r20, [sp,28] | |
1142 | SYM(__st_r13_to_r19): | |
d38a64b4 | 1143 | st r19, [sp,24] |
48c842ab | 1144 | SYM(__st_r13_to_r18): |
d38a64b4 | 1145 | st r18, [sp,20] |
48c842ab | 1146 | SYM(__st_r13_to_r17): |
d38a64b4 | 1147 | st r17, [sp,16] |
48c842ab | 1148 | SYM(__st_r13_to_r16): |
d38a64b4 | 1149 | st r16, [sp,12] |
48c842ab | 1150 | SYM(__st_r13_to_r15): |
d38a64b4 JR |
1151 | #ifdef __ARC700__ |
1152 | st r15, [sp,8] ; minimum function size to avoid stall: 6 bytes. | |
1153 | #else | |
1154 | st_s r15, [sp,8] | |
1155 | #endif | |
1156 | st_s r14, [sp,4] | |
1157 | j_s.d [%blink] | |
48c842ab | 1158 | st_s r13, [sp,0] |
d38a64b4 JR |
1159 | ENDFUNC(__st_r13_to_r15) |
1160 | ENDFUNC(__st_r13_to_r16) | |
1161 | ENDFUNC(__st_r13_to_r17) | |
1162 | ENDFUNC(__st_r13_to_r18) | |
1163 | ENDFUNC(__st_r13_to_r19) | |
1164 | ENDFUNC(__st_r13_to_r20) | |
1165 | ENDFUNC(__st_r13_to_r21) | |
1166 | ENDFUNC(__st_r13_to_r22) | |
1167 | ENDFUNC(__st_r13_to_r23) | |
1168 | ENDFUNC(__st_r13_to_r24) | |
1169 | ENDFUNC(__st_r13_to_r25) | |
1170 | #endif /* L_millicodethunk_st */ | |
1171 | ||
1172 | ||
1173 | #ifdef L_millicodethunk_ld | |
1174 | .section .text | |
1175 | .align 4 | |
48c842ab | 1176 | ; ================================== |
d38a64b4 JR |
1177 | ; the loads |
1178 | ||
1179 | .global SYM(__ld_r13_to_r15) | |
1180 | .global SYM(__ld_r13_to_r16) | |
1181 | .global SYM(__ld_r13_to_r17) | |
1182 | .global SYM(__ld_r13_to_r18) | |
1183 | .global SYM(__ld_r13_to_r19) | |
1184 | .global SYM(__ld_r13_to_r20) | |
1185 | .global SYM(__ld_r13_to_r21) | |
1186 | .global SYM(__ld_r13_to_r22) | |
1187 | .global SYM(__ld_r13_to_r23) | |
1188 | .global SYM(__ld_r13_to_r24) | |
1189 | .global SYM(__ld_r13_to_r25) | |
1190 | HIDDEN_FUNC(__ld_r13_to_r15) | |
1191 | HIDDEN_FUNC(__ld_r13_to_r16) | |
1192 | HIDDEN_FUNC(__ld_r13_to_r17) | |
1193 | HIDDEN_FUNC(__ld_r13_to_r18) | |
1194 | HIDDEN_FUNC(__ld_r13_to_r19) | |
1195 | HIDDEN_FUNC(__ld_r13_to_r20) | |
1196 | HIDDEN_FUNC(__ld_r13_to_r21) | |
1197 | HIDDEN_FUNC(__ld_r13_to_r22) | |
1198 | HIDDEN_FUNC(__ld_r13_to_r23) | |
1199 | HIDDEN_FUNC(__ld_r13_to_r24) | |
1200 | HIDDEN_FUNC(__ld_r13_to_r25) | |
1201 | SYM(__ld_r13_to_r25): | |
1202 | ld r25, [sp,48] | |
1203 | SYM(__ld_r13_to_r24): | |
1204 | ld r24, [sp,44] | |
1205 | SYM(__ld_r13_to_r23): | |
1206 | ld r23, [sp,40] | |
1207 | SYM(__ld_r13_to_r22): | |
1208 | ld r22, [sp,36] | |
1209 | SYM(__ld_r13_to_r21): | |
1210 | ld r21, [sp,32] | |
1211 | SYM(__ld_r13_to_r20): | |
48c842ab | 1212 | ld r20, [sp,28] |
d38a64b4 JR |
1213 | SYM(__ld_r13_to_r19): |
1214 | ld r19, [sp,24] | |
1215 | SYM(__ld_r13_to_r18): | |
1216 | ld r18, [sp,20] | |
1217 | SYM(__ld_r13_to_r17): | |
1218 | ld r17, [sp,16] | |
1219 | SYM(__ld_r13_to_r16): | |
1220 | ld r16, [sp,12] | |
1221 | SYM(__ld_r13_to_r15): | |
1222 | #ifdef __ARC700__ | |
1223 | ld r15, [sp,8] ; minimum function size to avoid stall: 6 bytes. | |
1224 | #else | |
1225 | ld_s r15, [sp,8] | |
1226 | #endif | |
1227 | ld_s r14, [sp,4] | |
1228 | j_s.d [%blink] | |
1229 | ld_s r13, [sp,0] | |
1230 | ENDFUNC(__ld_r13_to_r15) | |
1231 | ENDFUNC(__ld_r13_to_r16) | |
1232 | ENDFUNC(__ld_r13_to_r17) | |
1233 | ENDFUNC(__ld_r13_to_r18) | |
1234 | ENDFUNC(__ld_r13_to_r19) | |
1235 | ENDFUNC(__ld_r13_to_r20) | |
1236 | ENDFUNC(__ld_r13_to_r21) | |
1237 | ENDFUNC(__ld_r13_to_r22) | |
1238 | ENDFUNC(__ld_r13_to_r23) | |
1239 | ENDFUNC(__ld_r13_to_r24) | |
1240 | ENDFUNC(__ld_r13_to_r25) | |
1241 | ||
1242 | #endif /* L_millicodethunk_ld */ | |
1243 | #ifdef L_millicodethunk_ret | |
1244 | .global SYM(__ld_r13_to_r14_ret) | |
1245 | .global SYM(__ld_r13_to_r15_ret) | |
1246 | .global SYM(__ld_r13_to_r16_ret) | |
1247 | .global SYM(__ld_r13_to_r17_ret) | |
1248 | .global SYM(__ld_r13_to_r18_ret) | |
1249 | .global SYM(__ld_r13_to_r19_ret) | |
1250 | .global SYM(__ld_r13_to_r20_ret) | |
1251 | .global SYM(__ld_r13_to_r21_ret) | |
1252 | .global SYM(__ld_r13_to_r22_ret) | |
1253 | .global SYM(__ld_r13_to_r23_ret) | |
1254 | .global SYM(__ld_r13_to_r24_ret) | |
1255 | .global SYM(__ld_r13_to_r25_ret) | |
1256 | HIDDEN_FUNC(__ld_r13_to_r14_ret) | |
1257 | HIDDEN_FUNC(__ld_r13_to_r15_ret) | |
1258 | HIDDEN_FUNC(__ld_r13_to_r16_ret) | |
1259 | HIDDEN_FUNC(__ld_r13_to_r17_ret) | |
1260 | HIDDEN_FUNC(__ld_r13_to_r18_ret) | |
1261 | HIDDEN_FUNC(__ld_r13_to_r19_ret) | |
1262 | HIDDEN_FUNC(__ld_r13_to_r20_ret) | |
1263 | HIDDEN_FUNC(__ld_r13_to_r21_ret) | |
1264 | HIDDEN_FUNC(__ld_r13_to_r22_ret) | |
1265 | HIDDEN_FUNC(__ld_r13_to_r23_ret) | |
1266 | HIDDEN_FUNC(__ld_r13_to_r24_ret) | |
1267 | HIDDEN_FUNC(__ld_r13_to_r25_ret) | |
1268 | .section .text | |
1269 | .align 4 | |
1270 | SYM(__ld_r13_to_r25_ret): | |
1271 | ld r25, [sp,48] | |
1272 | SYM(__ld_r13_to_r24_ret): | |
1273 | ld r24, [sp,44] | |
1274 | SYM(__ld_r13_to_r23_ret): | |
1275 | ld r23, [sp,40] | |
1276 | SYM(__ld_r13_to_r22_ret): | |
1277 | ld r22, [sp,36] | |
1278 | SYM(__ld_r13_to_r21_ret): | |
1279 | ld r21, [sp,32] | |
1280 | SYM(__ld_r13_to_r20_ret): | |
48c842ab | 1281 | ld r20, [sp,28] |
d38a64b4 JR |
1282 | SYM(__ld_r13_to_r19_ret): |
1283 | ld r19, [sp,24] | |
1284 | SYM(__ld_r13_to_r18_ret): | |
1285 | ld r18, [sp,20] | |
1286 | SYM(__ld_r13_to_r17_ret): | |
1287 | ld r17, [sp,16] | |
1288 | SYM(__ld_r13_to_r16_ret): | |
1289 | ld r16, [sp,12] | |
1290 | SYM(__ld_r13_to_r15_ret): | |
1291 | ld r15, [sp,8] | |
1292 | SYM(__ld_r13_to_r14_ret): | |
1293 | ld blink,[sp,r12] | |
1294 | ld_s r14, [sp,4] | |
1295 | ld.ab r13, [sp,r12] | |
1296 | j_s.d [%blink] | |
1297 | add_s sp,sp,4 | |
1298 | ENDFUNC(__ld_r13_to_r14_ret) | |
1299 | ENDFUNC(__ld_r13_to_r15_ret) | |
1300 | ENDFUNC(__ld_r13_to_r16_ret) | |
1301 | ENDFUNC(__ld_r13_to_r17_ret) | |
1302 | ENDFUNC(__ld_r13_to_r18_ret) | |
1303 | ENDFUNC(__ld_r13_to_r19_ret) | |
1304 | ENDFUNC(__ld_r13_to_r20_ret) | |
1305 | ENDFUNC(__ld_r13_to_r21_ret) | |
1306 | ENDFUNC(__ld_r13_to_r22_ret) | |
1307 | ENDFUNC(__ld_r13_to_r23_ret) | |
1308 | ENDFUNC(__ld_r13_to_r24_ret) | |
1309 | ENDFUNC(__ld_r13_to_r25_ret) | |
1310 | ||
1311 | #endif /* L_millicodethunk_ret */ | |
1312 | ||
2744b8b2 | 1313 | #if defined (__ARC700__) || defined (__ARC_FPX_QUARK__) |
d38a64b4 JR |
1314 | #ifdef L_adddf3 |
1315 | #ifdef __ARC_NORM__ | |
1316 | #include "ieee-754/adddf3.S" | |
1317 | #endif | |
1318 | #endif | |
1319 | ||
1320 | #ifdef L_muldf3 | |
48c842ab | 1321 | #ifdef __ARC_MPY__ |
d38a64b4 JR |
1322 | #include "ieee-754/muldf3.S" |
1323 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__) | |
1324 | #include "ieee-754/arc600-mul64/muldf3.S" | |
1325 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__) | |
1326 | #include "ieee-754/arc600-dsp/muldf3.S" | |
1327 | #endif | |
1328 | #endif | |
1329 | ||
1330 | #ifdef L_addsf3 | |
1331 | #ifdef __ARC_NORM__ | |
1332 | #include "ieee-754/addsf3.S" | |
1333 | #endif | |
1334 | #endif | |
1335 | ||
1336 | #ifdef L_mulsf3 | |
48c842ab | 1337 | #ifdef __ARC_MPY__ |
d38a64b4 JR |
1338 | #include "ieee-754/mulsf3.S" |
1339 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__) | |
1340 | #include "ieee-754/arc600-mul64/mulsf3.S" | |
1341 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__) | |
1342 | #include "ieee-754/arc600-dsp/mulsf3.S" | |
1343 | #elif defined (__ARC_NORM__) | |
1344 | #include "ieee-754/arc600/mulsf3.S" | |
1345 | #endif | |
1346 | #endif | |
1347 | ||
1348 | #ifdef L_divdf3 | |
48c842ab | 1349 | #ifdef __ARC_MPY__ |
d38a64b4 JR |
1350 | #include "ieee-754/divdf3.S" |
1351 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__) | |
1352 | #include "ieee-754/arc600-mul64/divdf3.S" | |
1353 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__) | |
1354 | #include "ieee-754/arc600-dsp/divdf3.S" | |
1355 | #endif | |
1356 | #endif | |
1357 | ||
1358 | #ifdef L_divsf3 | |
48c842ab | 1359 | #ifdef __ARC_MPY__ |
d38a64b4 JR |
1360 | #include "ieee-754/divsf3-stdmul.S" |
1361 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL64__) | |
1362 | #include "ieee-754/arc600-mul64/divsf3.S" | |
1363 | #elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__) | |
1364 | #include "ieee-754/arc600-dsp/divsf3.S" | |
1365 | #elif defined (__ARC_NORM__) | |
1366 | #include "ieee-754/arc600/divsf3.S" | |
1367 | #endif | |
1368 | #endif | |
1369 | ||
1370 | #ifdef L_extendsfdf2 | |
1371 | #ifdef __ARC_NORM__ | |
1372 | #include "ieee-754/extendsfdf2.S" | |
1373 | #endif | |
1374 | #endif | |
1375 | ||
1376 | #ifdef L_truncdfsf2 | |
1377 | #ifdef __ARC_NORM__ | |
1378 | #include "ieee-754/truncdfsf2.S" | |
1379 | #endif | |
1380 | #endif | |
1381 | ||
1382 | #ifdef L_floatsidf | |
1383 | #ifdef __ARC_NORM__ | |
1384 | #include "ieee-754/floatsidf.S" | |
1385 | #endif | |
1386 | #endif | |
1387 | ||
1388 | #ifdef L_floatsisf | |
1389 | #ifdef __ARC_NORM__ | |
1390 | #include "ieee-754/floatsisf.S" | |
1391 | #endif | |
1392 | #endif | |
1393 | ||
1394 | #ifdef L_floatunsidf | |
1395 | #ifdef __ARC_NORM__ | |
1396 | #include "ieee-754/floatunsidf.S" | |
1397 | #endif | |
1398 | #endif | |
1399 | ||
1400 | #ifdef L_fixdfsi | |
1401 | #ifdef __ARC_NORM__ | |
1402 | #include "ieee-754/fixdfsi.S" | |
1403 | #endif | |
1404 | #endif | |
1405 | ||
1406 | #ifdef L_fixsfsi | |
1407 | #ifdef __ARC_NORM__ | |
1408 | #include "ieee-754/fixsfsi.S" | |
1409 | #endif | |
1410 | #endif | |
1411 | ||
1412 | #ifdef L_fixunsdfsi | |
1413 | #ifdef __ARC_NORM__ | |
1414 | #include "ieee-754/fixunsdfsi.S" | |
1415 | #endif | |
1416 | #endif | |
1417 | ||
1418 | #ifdef L_eqdf2 | |
1419 | #ifdef __ARC_NORM__ | |
1420 | #include "ieee-754/eqdf2.S" | |
1421 | #endif | |
1422 | #endif | |
1423 | ||
1424 | #ifdef L_eqsf2 | |
1425 | #ifdef __ARC_NORM__ | |
1426 | #include "ieee-754/eqsf2.S" | |
1427 | #endif | |
1428 | #endif | |
1429 | ||
1430 | #ifdef L_gtdf2 | |
1431 | #ifdef __ARC_NORM__ | |
1432 | #include "ieee-754/gtdf2.S" | |
1433 | #endif | |
1434 | #endif | |
1435 | ||
1436 | #ifdef L_gtsf2 | |
1437 | #ifdef __ARC_NORM__ | |
1438 | #include "ieee-754/gtsf2.S" | |
1439 | #endif | |
1440 | #endif | |
1441 | ||
1442 | #ifdef L_gedf2 | |
1443 | #ifdef __ARC_NORM__ | |
1444 | #include "ieee-754/gedf2.S" | |
1445 | #endif | |
1446 | #endif | |
1447 | ||
1448 | #ifdef L_gesf2 | |
1449 | #ifdef __ARC_NORM__ | |
1450 | #include "ieee-754/gesf2.S" | |
1451 | #endif | |
1452 | #endif | |
1453 | ||
1454 | #ifdef L_uneqdf2 | |
1455 | #ifdef __ARC_NORM__ | |
1456 | #include "ieee-754/uneqdf2.S" | |
1457 | #endif | |
1458 | #endif | |
1459 | ||
1460 | #ifdef L_uneqsf2 | |
1461 | #ifdef __ARC_NORM__ | |
1462 | #include "ieee-754/uneqsf2.S" | |
1463 | #endif | |
1464 | #endif | |
1465 | ||
1466 | #ifdef L_orddf2 | |
1467 | #ifdef __ARC_NORM__ | |
1468 | #include "ieee-754/orddf2.S" | |
1469 | #endif | |
1470 | #endif | |
1471 | ||
1472 | #ifdef L_ordsf2 | |
1473 | #ifdef __ARC_NORM__ | |
1474 | #include "ieee-754/ordsf2.S" | |
1475 | #endif | |
1476 | #endif | |
48c842ab | 1477 | #endif /* ARC_OPTFPE */ |
2744b8b2 CZ |
1478 | |
1479 | #endif /* !__ARC_RF16__ */ |