]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/sh/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / sh / lib1funcs.S
1 /* Copyright (C) 1994-2020 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 3, or (at your option) any
6 later version.
7
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
12
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
16
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
21
22
23 !! libgcc routines for the Renesas / SuperH SH CPUs.
24 !! Contributed by Steve Chamberlain.
25 !! sac@cygnus.com
26
27 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
28 !! recoded in assembly by Toshiyasu Morita
29 !! tm@netcom.com
30
31 #if defined(__ELF__) && defined(__linux__)
32 .section .note.GNU-stack,"",%progbits
33 .previous
34 #endif
35
36 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
37 ELF local label prefixes by J"orn Rennecke
38 amylaar@cygnus.com */
39
40 #include "lib1funcs.h"
41
42 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
43 so it is more convenient to define NO_FPSCR_VALUES here than to
44 define it on the command line. */
45 #if defined __vxworks && defined __PIC__
46 #define NO_FPSCR_VALUES
47 #endif
48
49 #ifdef L_ashiftrt
50 .global GLOBAL(ashiftrt_r4_0)
51 .global GLOBAL(ashiftrt_r4_1)
52 .global GLOBAL(ashiftrt_r4_2)
53 .global GLOBAL(ashiftrt_r4_3)
54 .global GLOBAL(ashiftrt_r4_4)
55 .global GLOBAL(ashiftrt_r4_5)
56 .global GLOBAL(ashiftrt_r4_6)
57 .global GLOBAL(ashiftrt_r4_7)
58 .global GLOBAL(ashiftrt_r4_8)
59 .global GLOBAL(ashiftrt_r4_9)
60 .global GLOBAL(ashiftrt_r4_10)
61 .global GLOBAL(ashiftrt_r4_11)
62 .global GLOBAL(ashiftrt_r4_12)
63 .global GLOBAL(ashiftrt_r4_13)
64 .global GLOBAL(ashiftrt_r4_14)
65 .global GLOBAL(ashiftrt_r4_15)
66 .global GLOBAL(ashiftrt_r4_16)
67 .global GLOBAL(ashiftrt_r4_17)
68 .global GLOBAL(ashiftrt_r4_18)
69 .global GLOBAL(ashiftrt_r4_19)
70 .global GLOBAL(ashiftrt_r4_20)
71 .global GLOBAL(ashiftrt_r4_21)
72 .global GLOBAL(ashiftrt_r4_22)
73 .global GLOBAL(ashiftrt_r4_23)
74 .global GLOBAL(ashiftrt_r4_24)
75 .global GLOBAL(ashiftrt_r4_25)
76 .global GLOBAL(ashiftrt_r4_26)
77 .global GLOBAL(ashiftrt_r4_27)
78 .global GLOBAL(ashiftrt_r4_28)
79 .global GLOBAL(ashiftrt_r4_29)
80 .global GLOBAL(ashiftrt_r4_30)
81 .global GLOBAL(ashiftrt_r4_31)
82 .global GLOBAL(ashiftrt_r4_32)
83
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
115 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
116 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
117
118 .align 1
119 GLOBAL(ashiftrt_r4_32):
120 GLOBAL(ashiftrt_r4_31):
121 rotcl r4
122 rts
123 subc r4,r4
124
125 GLOBAL(ashiftrt_r4_30):
126 shar r4
127 GLOBAL(ashiftrt_r4_29):
128 shar r4
129 GLOBAL(ashiftrt_r4_28):
130 shar r4
131 GLOBAL(ashiftrt_r4_27):
132 shar r4
133 GLOBAL(ashiftrt_r4_26):
134 shar r4
135 GLOBAL(ashiftrt_r4_25):
136 shar r4
137 GLOBAL(ashiftrt_r4_24):
138 shlr16 r4
139 shlr8 r4
140 rts
141 exts.b r4,r4
142
143 GLOBAL(ashiftrt_r4_23):
144 shar r4
145 GLOBAL(ashiftrt_r4_22):
146 shar r4
147 GLOBAL(ashiftrt_r4_21):
148 shar r4
149 GLOBAL(ashiftrt_r4_20):
150 shar r4
151 GLOBAL(ashiftrt_r4_19):
152 shar r4
153 GLOBAL(ashiftrt_r4_18):
154 shar r4
155 GLOBAL(ashiftrt_r4_17):
156 shar r4
157 GLOBAL(ashiftrt_r4_16):
158 shlr16 r4
159 rts
160 exts.w r4,r4
161
162 GLOBAL(ashiftrt_r4_15):
163 shar r4
164 GLOBAL(ashiftrt_r4_14):
165 shar r4
166 GLOBAL(ashiftrt_r4_13):
167 shar r4
168 GLOBAL(ashiftrt_r4_12):
169 shar r4
170 GLOBAL(ashiftrt_r4_11):
171 shar r4
172 GLOBAL(ashiftrt_r4_10):
173 shar r4
174 GLOBAL(ashiftrt_r4_9):
175 shar r4
176 GLOBAL(ashiftrt_r4_8):
177 shar r4
178 GLOBAL(ashiftrt_r4_7):
179 shar r4
180 GLOBAL(ashiftrt_r4_6):
181 shar r4
182 GLOBAL(ashiftrt_r4_5):
183 shar r4
184 GLOBAL(ashiftrt_r4_4):
185 shar r4
186 GLOBAL(ashiftrt_r4_3):
187 shar r4
188 GLOBAL(ashiftrt_r4_2):
189 shar r4
190 GLOBAL(ashiftrt_r4_1):
191 rts
192 shar r4
193
194 GLOBAL(ashiftrt_r4_0):
195 rts
196 nop
197
198 ENDFUNC(GLOBAL(ashiftrt_r4_0))
199 ENDFUNC(GLOBAL(ashiftrt_r4_1))
200 ENDFUNC(GLOBAL(ashiftrt_r4_2))
201 ENDFUNC(GLOBAL(ashiftrt_r4_3))
202 ENDFUNC(GLOBAL(ashiftrt_r4_4))
203 ENDFUNC(GLOBAL(ashiftrt_r4_5))
204 ENDFUNC(GLOBAL(ashiftrt_r4_6))
205 ENDFUNC(GLOBAL(ashiftrt_r4_7))
206 ENDFUNC(GLOBAL(ashiftrt_r4_8))
207 ENDFUNC(GLOBAL(ashiftrt_r4_9))
208 ENDFUNC(GLOBAL(ashiftrt_r4_10))
209 ENDFUNC(GLOBAL(ashiftrt_r4_11))
210 ENDFUNC(GLOBAL(ashiftrt_r4_12))
211 ENDFUNC(GLOBAL(ashiftrt_r4_13))
212 ENDFUNC(GLOBAL(ashiftrt_r4_14))
213 ENDFUNC(GLOBAL(ashiftrt_r4_15))
214 ENDFUNC(GLOBAL(ashiftrt_r4_16))
215 ENDFUNC(GLOBAL(ashiftrt_r4_17))
216 ENDFUNC(GLOBAL(ashiftrt_r4_18))
217 ENDFUNC(GLOBAL(ashiftrt_r4_19))
218 ENDFUNC(GLOBAL(ashiftrt_r4_20))
219 ENDFUNC(GLOBAL(ashiftrt_r4_21))
220 ENDFUNC(GLOBAL(ashiftrt_r4_22))
221 ENDFUNC(GLOBAL(ashiftrt_r4_23))
222 ENDFUNC(GLOBAL(ashiftrt_r4_24))
223 ENDFUNC(GLOBAL(ashiftrt_r4_25))
224 ENDFUNC(GLOBAL(ashiftrt_r4_26))
225 ENDFUNC(GLOBAL(ashiftrt_r4_27))
226 ENDFUNC(GLOBAL(ashiftrt_r4_28))
227 ENDFUNC(GLOBAL(ashiftrt_r4_29))
228 ENDFUNC(GLOBAL(ashiftrt_r4_30))
229 ENDFUNC(GLOBAL(ashiftrt_r4_31))
230 ENDFUNC(GLOBAL(ashiftrt_r4_32))
231 #endif
232
233 #ifdef L_ashiftrt_n
234
235 !
236 ! GLOBAL(ashrsi3)
237 !
238 ! Entry:
239 !
240 ! r4: Value to shift
241 ! r5: Shift count
242 !
243 ! Exit:
244 !
245 ! r0: Result
246 !
247 ! Destroys:
248 !
249 ! T bit, r5
250 !
251
252 .global GLOBAL(ashrsi3)
253 HIDDEN_FUNC(GLOBAL(ashrsi3))
254 .align 2
255 GLOBAL(ashrsi3):
256 mov #31,r0
257 and r0,r5
258 mova LOCAL(ashrsi3_table),r0
259 mov.b @(r0,r5),r5
260 #ifdef __sh1__
261 add r5,r0
262 jmp @r0
263 #else
264 braf r5
265 #endif
266 mov r4,r0
267
268 .align 2
269 LOCAL(ashrsi3_table):
270 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
271 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
300 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
301 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
302
303 LOCAL(ashrsi3_31):
304 rotcl r0
305 rts
306 subc r0,r0
307
308 LOCAL(ashrsi3_30):
309 shar r0
310 LOCAL(ashrsi3_29):
311 shar r0
312 LOCAL(ashrsi3_28):
313 shar r0
314 LOCAL(ashrsi3_27):
315 shar r0
316 LOCAL(ashrsi3_26):
317 shar r0
318 LOCAL(ashrsi3_25):
319 shar r0
320 LOCAL(ashrsi3_24):
321 shlr16 r0
322 shlr8 r0
323 rts
324 exts.b r0,r0
325
326 LOCAL(ashrsi3_23):
327 shar r0
328 LOCAL(ashrsi3_22):
329 shar r0
330 LOCAL(ashrsi3_21):
331 shar r0
332 LOCAL(ashrsi3_20):
333 shar r0
334 LOCAL(ashrsi3_19):
335 shar r0
336 LOCAL(ashrsi3_18):
337 shar r0
338 LOCAL(ashrsi3_17):
339 shar r0
340 LOCAL(ashrsi3_16):
341 shlr16 r0
342 rts
343 exts.w r0,r0
344
345 LOCAL(ashrsi3_15):
346 shar r0
347 LOCAL(ashrsi3_14):
348 shar r0
349 LOCAL(ashrsi3_13):
350 shar r0
351 LOCAL(ashrsi3_12):
352 shar r0
353 LOCAL(ashrsi3_11):
354 shar r0
355 LOCAL(ashrsi3_10):
356 shar r0
357 LOCAL(ashrsi3_9):
358 shar r0
359 LOCAL(ashrsi3_8):
360 shar r0
361 LOCAL(ashrsi3_7):
362 shar r0
363 LOCAL(ashrsi3_6):
364 shar r0
365 LOCAL(ashrsi3_5):
366 shar r0
367 LOCAL(ashrsi3_4):
368 shar r0
369 LOCAL(ashrsi3_3):
370 shar r0
371 LOCAL(ashrsi3_2):
372 shar r0
373 LOCAL(ashrsi3_1):
374 rts
375 shar r0
376
377 LOCAL(ashrsi3_0):
378 rts
379 nop
380
381 ENDFUNC(GLOBAL(ashrsi3))
382 #endif
383
384 #ifdef L_ashiftlt
385
386 !
387 ! GLOBAL(ashlsi3)
388 ! (For compatibility with older binaries, not used by compiler)
389 !
390 ! Entry:
391 ! r4: Value to shift
392 ! r5: Shift count
393 !
394 ! Exit:
395 ! r0: Result
396 !
397 ! Destroys:
398 ! T bit
399 !
400 !
401 ! GLOBAL(ashlsi3_r0)
402 !
403 ! Entry:
404 ! r4: Value to shift
405 ! r0: Shift count
406 !
407 ! Exit:
408 ! r0: Result
409 !
410 ! Destroys:
411 ! T bit
412
413 .global GLOBAL(ashlsi3)
414 .global GLOBAL(ashlsi3_r0)
415 HIDDEN_FUNC(GLOBAL(ashlsi3))
416 HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
417 GLOBAL(ashlsi3):
418 mov r5,r0
419 .align 2
420 GLOBAL(ashlsi3_r0):
421
422 #ifdef __sh1__
423 and #31,r0
424 shll2 r0
425 mov.l r4,@-r15
426 mov r0,r4
427 mova LOCAL(ashlsi3_table),r0
428 add r4,r0
429 mov.l @r15+,r4
430 jmp @r0
431 mov r4,r0
432 .align 2
433 #else
434 and #31,r0
435 shll2 r0
436 braf r0
437 mov r4,r0
438 #endif
439
440 LOCAL(ashlsi3_table):
441 rts // << 0
442 nop
443 LOCAL(ashlsi_1):
444 rts // << 1
445 shll r0
446 LOCAL(ashlsi_2): // << 2
447 rts
448 shll2 r0
449 bra LOCAL(ashlsi_1) // << 3
450 shll2 r0
451 bra LOCAL(ashlsi_2) // << 4
452 shll2 r0
453 bra LOCAL(ashlsi_5) // << 5
454 shll r0
455 bra LOCAL(ashlsi_6) // << 6
456 shll2 r0
457 bra LOCAL(ashlsi_7) // << 7
458 shll r0
459 LOCAL(ashlsi_8): // << 8
460 rts
461 shll8 r0
462 bra LOCAL(ashlsi_8) // << 9
463 shll r0
464 bra LOCAL(ashlsi_8) // << 10
465 shll2 r0
466 bra LOCAL(ashlsi_11) // << 11
467 shll r0
468 bra LOCAL(ashlsi_12) // << 12
469 shll2 r0
470 bra LOCAL(ashlsi_13) // << 13
471 shll r0
472 bra LOCAL(ashlsi_14) // << 14
473 shll8 r0
474 bra LOCAL(ashlsi_15) // << 15
475 shll8 r0
476 LOCAL(ashlsi_16): // << 16
477 rts
478 shll16 r0
479 bra LOCAL(ashlsi_16) // << 17
480 shll r0
481 bra LOCAL(ashlsi_16) // << 18
482 shll2 r0
483 bra LOCAL(ashlsi_19) // << 19
484 shll r0
485 bra LOCAL(ashlsi_20) // << 20
486 shll2 r0
487 bra LOCAL(ashlsi_21) // << 21
488 shll r0
489 bra LOCAL(ashlsi_22) // << 22
490 shll16 r0
491 bra LOCAL(ashlsi_23) // << 23
492 shll16 r0
493 bra LOCAL(ashlsi_16) // << 24
494 shll8 r0
495 bra LOCAL(ashlsi_25) // << 25
496 shll r0
497 bra LOCAL(ashlsi_26) // << 26
498 shll2 r0
499 bra LOCAL(ashlsi_27) // << 27
500 shll r0
501 bra LOCAL(ashlsi_28) // << 28
502 shll2 r0
503 bra LOCAL(ashlsi_29) // << 29
504 shll16 r0
505 bra LOCAL(ashlsi_30) // << 30
506 shll16 r0
507 and #1,r0 // << 31
508 rts
509 rotr r0
510
511 LOCAL(ashlsi_7):
512 shll2 r0
513 LOCAL(ashlsi_5):
514 LOCAL(ashlsi_6):
515 shll2 r0
516 rts
517 LOCAL(ashlsi_13):
518 shll2 r0
519 LOCAL(ashlsi_12):
520 LOCAL(ashlsi_11):
521 shll8 r0
522 rts
523 LOCAL(ashlsi_21):
524 shll2 r0
525 LOCAL(ashlsi_20):
526 LOCAL(ashlsi_19):
527 shll16 r0
528 rts
529 LOCAL(ashlsi_28):
530 LOCAL(ashlsi_27):
531 shll2 r0
532 LOCAL(ashlsi_26):
533 LOCAL(ashlsi_25):
534 shll16 r0
535 rts
536 shll8 r0
537
538 LOCAL(ashlsi_22):
539 LOCAL(ashlsi_14):
540 shlr2 r0
541 rts
542 shll8 r0
543
544 LOCAL(ashlsi_23):
545 LOCAL(ashlsi_15):
546 shlr r0
547 rts
548 shll8 r0
549
550 LOCAL(ashlsi_29):
551 shlr r0
552 LOCAL(ashlsi_30):
553 shlr2 r0
554 rts
555 shll16 r0
556
557 ENDFUNC(GLOBAL(ashlsi3))
558 ENDFUNC(GLOBAL(ashlsi3_r0))
559 #endif
560
561 #ifdef L_lshiftrt
562
563 !
564 ! GLOBAL(lshrsi3)
565 ! (For compatibility with older binaries, not used by compiler)
566 !
567 ! Entry:
568 ! r4: Value to shift
569 ! r5: Shift count
570 !
571 ! Exit:
572 ! r0: Result
573 !
574 ! Destroys:
575 ! T bit
576 !
577 !
578 ! GLOBAL(lshrsi3_r0)
579 !
580 ! Entry:
581 ! r4: Value to shift
582 ! r0: Shift count
583 !
584 ! Exit:
585 ! r0: Result
586 !
587 ! Destroys:
588 ! T bit
589
590 .global GLOBAL(lshrsi3)
591 .global GLOBAL(lshrsi3_r0)
592 HIDDEN_FUNC(GLOBAL(lshrsi3))
593 HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
594 GLOBAL(lshrsi3):
595 mov r5,r0
596 .align 2
597 GLOBAL(lshrsi3_r0):
598
599 #ifdef __sh1__
600 and #31,r0
601 shll2 r0
602 mov.l r4,@-r15
603 mov r0,r4
604 mova LOCAL(lshrsi3_table),r0
605 add r4,r0
606 mov.l @r15+,r4
607 jmp @r0
608 mov r4,r0
609 .align 2
610 #else
611 and #31,r0
612 shll2 r0
613 braf r0
614 mov r4,r0
615 #endif
616 LOCAL(lshrsi3_table):
617 rts // >> 0
618 nop
619 LOCAL(lshrsi_1): // >> 1
620 rts
621 shlr r0
622 LOCAL(lshrsi_2): // >> 2
623 rts
624 shlr2 r0
625 bra LOCAL(lshrsi_1) // >> 3
626 shlr2 r0
627 bra LOCAL(lshrsi_2) // >> 4
628 shlr2 r0
629 bra LOCAL(lshrsi_5) // >> 5
630 shlr r0
631 bra LOCAL(lshrsi_6) // >> 6
632 shlr2 r0
633 bra LOCAL(lshrsi_7) // >> 7
634 shlr r0
635 LOCAL(lshrsi_8): // >> 8
636 rts
637 shlr8 r0
638 bra LOCAL(lshrsi_8) // >> 9
639 shlr r0
640 bra LOCAL(lshrsi_8) // >> 10
641 shlr2 r0
642 bra LOCAL(lshrsi_11) // >> 11
643 shlr r0
644 bra LOCAL(lshrsi_12) // >> 12
645 shlr2 r0
646 bra LOCAL(lshrsi_13) // >> 13
647 shlr r0
648 bra LOCAL(lshrsi_14) // >> 14
649 shlr8 r0
650 bra LOCAL(lshrsi_15) // >> 15
651 shlr8 r0
652 LOCAL(lshrsi_16): // >> 16
653 rts
654 shlr16 r0
655 bra LOCAL(lshrsi_16) // >> 17
656 shlr r0
657 bra LOCAL(lshrsi_16) // >> 18
658 shlr2 r0
659 bra LOCAL(lshrsi_19) // >> 19
660 shlr r0
661 bra LOCAL(lshrsi_20) // >> 20
662 shlr2 r0
663 bra LOCAL(lshrsi_21) // >> 21
664 shlr r0
665 bra LOCAL(lshrsi_22) // >> 22
666 shlr16 r0
667 bra LOCAL(lshrsi_23) // >> 23
668 shlr16 r0
669 bra LOCAL(lshrsi_16) // >> 24
670 shlr8 r0
671 bra LOCAL(lshrsi_25) // >> 25
672 shlr r0
673 bra LOCAL(lshrsi_26) // >> 26
674 shlr2 r0
675 bra LOCAL(lshrsi_27) // >> 27
676 shlr r0
677 bra LOCAL(lshrsi_28) // >> 28
678 shlr2 r0
679 bra LOCAL(lshrsi_29) // >> 29
680 shlr16 r0
681 bra LOCAL(lshrsi_30) // >> 30
682 shlr16 r0
683 shll r0 // >> 31
684 rts
685 movt r0
686
687 LOCAL(lshrsi_7):
688 shlr2 r0
689 LOCAL(lshrsi_5):
690 LOCAL(lshrsi_6):
691 shlr2 r0
692 rts
693 LOCAL(lshrsi_13):
694 shlr2 r0
695 LOCAL(lshrsi_12):
696 LOCAL(lshrsi_11):
697 shlr8 r0
698 rts
699 LOCAL(lshrsi_21):
700 shlr2 r0
701 LOCAL(lshrsi_20):
702 LOCAL(lshrsi_19):
703 shlr16 r0
704 rts
705 LOCAL(lshrsi_28):
706 LOCAL(lshrsi_27):
707 shlr2 r0
708 LOCAL(lshrsi_26):
709 LOCAL(lshrsi_25):
710 shlr16 r0
711 rts
712 shlr8 r0
713
714 LOCAL(lshrsi_22):
715 LOCAL(lshrsi_14):
716 shll2 r0
717 rts
718 shlr8 r0
719
720 LOCAL(lshrsi_23):
721 LOCAL(lshrsi_15):
722 shll r0
723 rts
724 shlr8 r0
725
726 LOCAL(lshrsi_29):
727 shll r0
728 LOCAL(lshrsi_30):
729 shll2 r0
730 rts
731 shlr16 r0
732
733 ENDFUNC(GLOBAL(lshrsi3))
734 ENDFUNC(GLOBAL(lshrsi3_r0))
735 #endif
736
737 #ifdef L_movmem
738 .text
739 .balign 4
740 .global GLOBAL(movmem)
741 HIDDEN_FUNC(GLOBAL(movmem))
742 HIDDEN_ALIAS(movstr,movmem)
743 /* This would be a lot simpler if r6 contained the byte count
744 minus 64, and we wouldn't be called here for a byte count of 64. */
745 GLOBAL(movmem):
746 sts.l pr,@-r15
747 shll2 r6
748 bsr GLOBAL(movmemSI52+2)
749 mov.l @(48,r5),r0
750 .balign 4
751 LOCAL(movmem_loop): /* Reached with rts */
752 mov.l @(60,r5),r0
753 add #-64,r6
754 mov.l r0,@(60,r4)
755 tst r6,r6
756 mov.l @(56,r5),r0
757 bt LOCAL(movmem_done)
758 mov.l r0,@(56,r4)
759 cmp/pl r6
760 mov.l @(52,r5),r0
761 add #64,r5
762 mov.l r0,@(52,r4)
763 add #64,r4
764 bt GLOBAL(movmemSI52)
765 ! done all the large groups, do the remainder
766 ! jump to movmem+
767 mova GLOBAL(movmemSI4)+4,r0
768 add r6,r0
769 jmp @r0
770 LOCAL(movmem_done): ! share slot insn, works out aligned.
771 lds.l @r15+,pr
772 mov.l r0,@(56,r4)
773 mov.l @(52,r5),r0
774 rts
775 mov.l r0,@(52,r4)
776 .balign 4
777 ! ??? We need aliases movstr* for movmem* for the older libraries. These
778 ! aliases will be removed at the some point in the future.
779 .global GLOBAL(movmemSI64)
780 HIDDEN_FUNC(GLOBAL(movmemSI64))
781 HIDDEN_ALIAS(movstrSI64,movmemSI64)
782 GLOBAL(movmemSI64):
783 mov.l @(60,r5),r0
784 mov.l r0,@(60,r4)
785 .global GLOBAL(movmemSI60)
786 HIDDEN_FUNC(GLOBAL(movmemSI60))
787 HIDDEN_ALIAS(movstrSI60,movmemSI60)
788 GLOBAL(movmemSI60):
789 mov.l @(56,r5),r0
790 mov.l r0,@(56,r4)
791 .global GLOBAL(movmemSI56)
792 HIDDEN_FUNC(GLOBAL(movmemSI56))
793 HIDDEN_ALIAS(movstrSI56,movmemSI56)
794 GLOBAL(movmemSI56):
795 mov.l @(52,r5),r0
796 mov.l r0,@(52,r4)
797 .global GLOBAL(movmemSI52)
798 HIDDEN_FUNC(GLOBAL(movmemSI52))
799 HIDDEN_ALIAS(movstrSI52,movmemSI52)
800 GLOBAL(movmemSI52):
801 mov.l @(48,r5),r0
802 mov.l r0,@(48,r4)
803 .global GLOBAL(movmemSI48)
804 HIDDEN_FUNC(GLOBAL(movmemSI48))
805 HIDDEN_ALIAS(movstrSI48,movmemSI48)
806 GLOBAL(movmemSI48):
807 mov.l @(44,r5),r0
808 mov.l r0,@(44,r4)
809 .global GLOBAL(movmemSI44)
810 HIDDEN_FUNC(GLOBAL(movmemSI44))
811 HIDDEN_ALIAS(movstrSI44,movmemSI44)
812 GLOBAL(movmemSI44):
813 mov.l @(40,r5),r0
814 mov.l r0,@(40,r4)
815 .global GLOBAL(movmemSI40)
816 HIDDEN_FUNC(GLOBAL(movmemSI40))
817 HIDDEN_ALIAS(movstrSI40,movmemSI40)
818 GLOBAL(movmemSI40):
819 mov.l @(36,r5),r0
820 mov.l r0,@(36,r4)
821 .global GLOBAL(movmemSI36)
822 HIDDEN_FUNC(GLOBAL(movmemSI36))
823 HIDDEN_ALIAS(movstrSI36,movmemSI36)
824 GLOBAL(movmemSI36):
825 mov.l @(32,r5),r0
826 mov.l r0,@(32,r4)
827 .global GLOBAL(movmemSI32)
828 HIDDEN_FUNC(GLOBAL(movmemSI32))
829 HIDDEN_ALIAS(movstrSI32,movmemSI32)
830 GLOBAL(movmemSI32):
831 mov.l @(28,r5),r0
832 mov.l r0,@(28,r4)
833 .global GLOBAL(movmemSI28)
834 HIDDEN_FUNC(GLOBAL(movmemSI28))
835 HIDDEN_ALIAS(movstrSI28,movmemSI28)
836 GLOBAL(movmemSI28):
837 mov.l @(24,r5),r0
838 mov.l r0,@(24,r4)
839 .global GLOBAL(movmemSI24)
840 HIDDEN_FUNC(GLOBAL(movmemSI24))
841 HIDDEN_ALIAS(movstrSI24,movmemSI24)
842 GLOBAL(movmemSI24):
843 mov.l @(20,r5),r0
844 mov.l r0,@(20,r4)
845 .global GLOBAL(movmemSI20)
846 HIDDEN_FUNC(GLOBAL(movmemSI20))
847 HIDDEN_ALIAS(movstrSI20,movmemSI20)
848 GLOBAL(movmemSI20):
849 mov.l @(16,r5),r0
850 mov.l r0,@(16,r4)
851 .global GLOBAL(movmemSI16)
852 HIDDEN_FUNC(GLOBAL(movmemSI16))
853 HIDDEN_ALIAS(movstrSI16,movmemSI16)
854 GLOBAL(movmemSI16):
855 mov.l @(12,r5),r0
856 mov.l r0,@(12,r4)
857 .global GLOBAL(movmemSI12)
858 HIDDEN_FUNC(GLOBAL(movmemSI12))
859 HIDDEN_ALIAS(movstrSI12,movmemSI12)
860 GLOBAL(movmemSI12):
861 mov.l @(8,r5),r0
862 mov.l r0,@(8,r4)
863 .global GLOBAL(movmemSI8)
864 HIDDEN_FUNC(GLOBAL(movmemSI8))
865 HIDDEN_ALIAS(movstrSI8,movmemSI8)
866 GLOBAL(movmemSI8):
867 mov.l @(4,r5),r0
868 mov.l r0,@(4,r4)
869 .global GLOBAL(movmemSI4)
870 HIDDEN_FUNC(GLOBAL(movmemSI4))
871 HIDDEN_ALIAS(movstrSI4,movmemSI4)
872 GLOBAL(movmemSI4):
873 mov.l @(0,r5),r0
874 rts
875 mov.l r0,@(0,r4)
876
877 ENDFUNC(GLOBAL(movmemSI64))
878 ENDFUNC(GLOBAL(movmemSI60))
879 ENDFUNC(GLOBAL(movmemSI56))
880 ENDFUNC(GLOBAL(movmemSI52))
881 ENDFUNC(GLOBAL(movmemSI48))
882 ENDFUNC(GLOBAL(movmemSI44))
883 ENDFUNC(GLOBAL(movmemSI40))
884 ENDFUNC(GLOBAL(movmemSI36))
885 ENDFUNC(GLOBAL(movmemSI32))
886 ENDFUNC(GLOBAL(movmemSI28))
887 ENDFUNC(GLOBAL(movmemSI24))
888 ENDFUNC(GLOBAL(movmemSI20))
889 ENDFUNC(GLOBAL(movmemSI16))
890 ENDFUNC(GLOBAL(movmemSI12))
891 ENDFUNC(GLOBAL(movmemSI8))
892 ENDFUNC(GLOBAL(movmemSI4))
893 ENDFUNC(GLOBAL(movmem))
894 #endif
895
896 #ifdef L_movmem_i4
897 .text
898 .global GLOBAL(movmem_i4_even)
899 .global GLOBAL(movmem_i4_odd)
900 .global GLOBAL(movmemSI12_i4)
901
902 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
903 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
904 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
905
906 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
907 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
908 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
909
910 .p2align 5
911 L_movmem_2mod4_end:
912 mov.l r0,@(16,r4)
913 rts
914 mov.l r1,@(20,r4)
915
916 .p2align 2
917
918 GLOBAL(movmem_i4_even):
919 mov.l @r5+,r0
920 bra L_movmem_start_even
921 mov.l @r5+,r1
922
923 GLOBAL(movmem_i4_odd):
924 mov.l @r5+,r1
925 add #-4,r4
926 mov.l @r5+,r2
927 mov.l @r5+,r3
928 mov.l r1,@(4,r4)
929 mov.l r2,@(8,r4)
930
931 L_movmem_loop:
932 mov.l r3,@(12,r4)
933 dt r6
934 mov.l @r5+,r0
935 bt/s L_movmem_2mod4_end
936 mov.l @r5+,r1
937 add #16,r4
938 L_movmem_start_even:
939 mov.l @r5+,r2
940 mov.l @r5+,r3
941 mov.l r0,@r4
942 dt r6
943 mov.l r1,@(4,r4)
944 bf/s L_movmem_loop
945 mov.l r2,@(8,r4)
946 rts
947 mov.l r3,@(12,r4)
948
949 ENDFUNC(GLOBAL(movmem_i4_even))
950 ENDFUNC(GLOBAL(movmem_i4_odd))
951
952 .p2align 4
953 GLOBAL(movmemSI12_i4):
954 mov.l @r5,r0
955 mov.l @(4,r5),r1
956 mov.l @(8,r5),r2
957 mov.l r0,@r4
958 mov.l r1,@(4,r4)
959 rts
960 mov.l r2,@(8,r4)
961
962 ENDFUNC(GLOBAL(movmemSI12_i4))
963 #endif
964
965 #ifdef L_mulsi3
966
967
968 .global GLOBAL(mulsi3)
969 HIDDEN_FUNC(GLOBAL(mulsi3))
970
971 ! r4 = aabb
972 ! r5 = ccdd
973 ! r0 = aabb*ccdd via partial products
974 !
975 ! if aa == 0 and cc = 0
976 ! r0 = bb*dd
977 !
978 ! else
979 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
980 !
981
982 GLOBAL(mulsi3):
983 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
984 mov r5,r3 ! r3 = ccdd
985 swap.w r4,r2 ! r2 = bbaa
986 xtrct r2,r3 ! r3 = aacc
987 tst r3,r3 ! msws zero ?
988 bf hiset
989 rts ! yes - then we have the answer
990 sts macl,r0
991
992 hiset: sts macl,r0 ! r0 = bb*dd
993 mulu.w r2,r5 ! brewing macl = aa*dd
994 sts macl,r1
995 mulu.w r3,r4 ! brewing macl = cc*bb
996 sts macl,r2
997 add r1,r2
998 shll16 r2
999 rts
1000 add r2,r0
1001
1002 ENDFUNC(GLOBAL(mulsi3))
1003 #endif
1004
1005 /*------------------------------------------------------------------------------
1006 32 bit signed integer division that uses FPU double precision division. */
1007
1008 #ifdef L_sdivsi3_i4
1009 .title "SH DIVIDE"
1010
1011 #if defined (__SH4__) || defined (__SH2A__)
1012 /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1013 setting.
1014 Args in r4 and r5, result in fpul, clobber dr0, dr2. */
1015
1016 .global GLOBAL(sdivsi3_i4)
1017 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1018 GLOBAL(sdivsi3_i4):
1019 lds r4,fpul
1020 float fpul,dr0
1021 lds r5,fpul
1022 float fpul,dr2
1023 fdiv dr2,dr0
1024 rts
1025 ftrc dr0,fpul
1026
1027 ENDFUNC(GLOBAL(sdivsi3_i4))
1028
1029 #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1030 /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1031 setting.
1032 Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
1033 For this to work, we must temporarily switch the FPU do double precision,
1034 but we better do not touch FPSCR.FR. See PR 6526. */
1035
1036 .global GLOBAL(sdivsi3_i4)
1037 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
1038 GLOBAL(sdivsi3_i4):
1039
1040 #ifndef __SH4A__
1041 mov.l r3,@-r15
1042 sts fpscr,r2
1043 mov #8,r3
1044 swap.w r3,r3 // r3 = 1 << 19 (FPSCR.PR bit)
1045 or r2,r3
1046 lds r3,fpscr // Set FPSCR.PR = 1.
1047 lds r4,fpul
1048 float fpul,dr0
1049 lds r5,fpul
1050 float fpul,dr2
1051 fdiv dr2,dr0
1052 ftrc dr0,fpul
1053 lds r2,fpscr
1054 rts
1055 mov.l @r15+,r3
1056 #else
1057 /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit. */
1058 fpchg
1059 lds r4,fpul
1060 float fpul,dr0
1061 lds r5,fpul
1062 float fpul,dr2
1063 fdiv dr2,dr0
1064 ftrc dr0,fpul
1065 rts
1066 fpchg
1067
1068 #endif /* __SH4A__ */
1069
1070 ENDFUNC(GLOBAL(sdivsi3_i4))
1071 #endif /* ! __SH4__ || __SH2A__ */
1072 #endif /* L_sdivsi3_i4 */
1073
1074 //------------------------------------------------------------------------------
1075 #ifdef L_sdivsi3
1076 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1077 sh2e/sh3e code. */
1078 !!
1079 !! Steve Chamberlain
1080 !! sac@cygnus.com
1081 !!
1082 !!
1083
1084 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1085
1086 .global GLOBAL(sdivsi3)
1087 .align 2
1088
1089 FUNC(GLOBAL(sdivsi3))
1090 GLOBAL(sdivsi3):
1091 mov r4,r1
1092 mov r5,r0
1093
1094 tst r0,r0
1095 bt div0
1096 mov #0,r2
1097 div0s r2,r1
1098 subc r3,r3
1099 subc r2,r1
1100 div0s r0,r3
1101 rotcl r1
1102 div1 r0,r3
1103 rotcl r1
1104 div1 r0,r3
1105 rotcl r1
1106 div1 r0,r3
1107 rotcl r1
1108 div1 r0,r3
1109 rotcl r1
1110 div1 r0,r3
1111 rotcl r1
1112 div1 r0,r3
1113 rotcl r1
1114 div1 r0,r3
1115 rotcl r1
1116 div1 r0,r3
1117 rotcl r1
1118 div1 r0,r3
1119 rotcl r1
1120 div1 r0,r3
1121 rotcl r1
1122 div1 r0,r3
1123 rotcl r1
1124 div1 r0,r3
1125 rotcl r1
1126 div1 r0,r3
1127 rotcl r1
1128 div1 r0,r3
1129 rotcl r1
1130 div1 r0,r3
1131 rotcl r1
1132 div1 r0,r3
1133 rotcl r1
1134 div1 r0,r3
1135 rotcl r1
1136 div1 r0,r3
1137 rotcl r1
1138 div1 r0,r3
1139 rotcl r1
1140 div1 r0,r3
1141 rotcl r1
1142 div1 r0,r3
1143 rotcl r1
1144 div1 r0,r3
1145 rotcl r1
1146 div1 r0,r3
1147 rotcl r1
1148 div1 r0,r3
1149 rotcl r1
1150 div1 r0,r3
1151 rotcl r1
1152 div1 r0,r3
1153 rotcl r1
1154 div1 r0,r3
1155 rotcl r1
1156 div1 r0,r3
1157 rotcl r1
1158 div1 r0,r3
1159 rotcl r1
1160 div1 r0,r3
1161 rotcl r1
1162 div1 r0,r3
1163 rotcl r1
1164 div1 r0,r3
1165 rotcl r1
1166 addc r2,r1
1167 rts
1168 mov r1,r0
1169
1170
1171 div0: rts
1172 mov #0,r0
1173
1174 ENDFUNC(GLOBAL(sdivsi3))
1175 #endif /* L_sdivsi3 */
1176
1177 /*------------------------------------------------------------------------------
1178 32 bit unsigned integer division that uses FPU double precision division. */
1179
1180 #ifdef L_udivsi3_i4
1181 .title "SH DIVIDE"
1182
1183 #if defined (__SH4__) || defined (__SH2A__)
1184 /* This variant is used when FPSCR.PR = 1 (double precision) is the default
1185 setting.
1186 Args in r4 and r5, result in fpul,
1187 clobber r0, r1, r4, r5, dr0, dr2, dr4, and t bit */
1188
1189 .global GLOBAL(udivsi3_i4)
1190 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1191 GLOBAL(udivsi3_i4):
1192 mov #1,r1
1193 cmp/hi r1,r5
1194 bf/s trivial
1195 rotr r1
1196 xor r1,r4
1197 lds r4,fpul
1198 mova L1,r0
1199 #ifdef FMOVD_WORKS
1200 fmov.d @r0+,dr4
1201 #else
1202 fmov.s @r0+,DR40
1203 fmov.s @r0,DR41
1204 #endif
1205 float fpul,dr0
1206 xor r1,r5
1207 lds r5,fpul
1208 float fpul,dr2
1209 fadd dr4,dr0
1210 fadd dr4,dr2
1211 fdiv dr2,dr0
1212 rts
1213 ftrc dr0,fpul
1214
1215 trivial:
1216 rts
1217 lds r4,fpul
1218
1219 .align 2
1220 #ifdef FMOVD_WORKS
1221 .align 3 // Make the double below 8 byte aligned.
1222 #endif
1223 L1:
1224 .double 2147483648
1225
1226 ENDFUNC(GLOBAL(udivsi3_i4))
1227
1228 #elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1229 /* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
1230 setting.
1231 Args in r4 and r5, result in fpul,
1232 clobber r0, r1, r4, r5, dr0, dr2, dr4.
1233 For this to work, we must temporarily switch the FPU do double precision,
1234 but we better do not touch FPSCR.FR. See PR 6526. */
1235
1236 .global GLOBAL(udivsi3_i4)
1237 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1238 GLOBAL(udivsi3_i4):
1239
1240 #ifndef __SH4A__
1241 mov #1,r1
1242 cmp/hi r1,r5
1243 bf/s trivial
1244 rotr r1 // r1 = 1 << 31
1245 sts.l fpscr,@-r15
1246 xor r1,r4
1247 mov.l @(0,r15),r0
1248 xor r1,r5
1249 mov.l L2,r1
1250 lds r4,fpul
1251 or r0,r1
1252 mova L1,r0
1253 lds r1,fpscr
1254 #ifdef FMOVD_WORKS
1255 fmov.d @r0+,dr4
1256 #else
1257 fmov.s @r0+,DR40
1258 fmov.s @r0,DR41
1259 #endif
1260 float fpul,dr0
1261 lds r5,fpul
1262 float fpul,dr2
1263 fadd dr4,dr0
1264 fadd dr4,dr2
1265 fdiv dr2,dr0
1266 ftrc dr0,fpul
1267 rts
1268 lds.l @r15+,fpscr
1269
1270 #ifdef FMOVD_WORKS
1271 .align 3 // Make the double below 8 byte aligned.
1272 #endif
1273 trivial:
1274 rts
1275 lds r4,fpul
1276
1277 .align 2
1278 L2:
1279 #ifdef FMOVD_WORKS
1280 .long 0x180000 // FPSCR.PR = 1, FPSCR.SZ = 1
1281 #else
1282 .long 0x80000 // FPSCR.PR = 1
1283 #endif
1284 L1:
1285 .double 2147483648
1286
1287 #else
1288 /* On SH4A we can use the fpchg instruction to flip the FPSCR.PR bit.
1289 Although on SH4A fmovd usually works, it would require either additional
1290 two fschg instructions or an FPSCR push + pop. It's not worth the effort
1291 for loading only one double constant. */
1292 mov #1,r1
1293 cmp/hi r1,r5
1294 bf/s trivial
1295 rotr r1 // r1 = 1 << 31
1296 fpchg
1297 mova L1,r0
1298 xor r1,r4
1299 fmov.s @r0+,DR40
1300 lds r4,fpul
1301 fmov.s @r0,DR41
1302 xor r1,r5
1303 float fpul,dr0
1304 lds r5,fpul
1305 float fpul,dr2
1306 fadd dr4,dr0
1307 fadd dr4,dr2
1308 fdiv dr2,dr0
1309 ftrc dr0,fpul
1310 rts
1311 fpchg
1312
1313 trivial:
1314 rts
1315 lds r4,fpul
1316
1317 .align 2
1318 L1:
1319 .double 2147483648
1320
1321 #endif /* __SH4A__ */
1322
1323
1324 ENDFUNC(GLOBAL(udivsi3_i4))
1325 #endif /* ! __SH4__ */
1326 #endif /* L_udivsi3_i4 */
1327
1328 #ifdef L_udivsi3
1329 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1330 sh2e/sh3e code. */
1331
1332 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1333 .global GLOBAL(udivsi3)
1334 HIDDEN_FUNC(GLOBAL(udivsi3))
1335
1336 LOCAL(div8):
1337 div1 r5,r4
1338 LOCAL(div7):
1339 div1 r5,r4; div1 r5,r4; div1 r5,r4
1340 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1341
1342 LOCAL(divx4):
1343 div1 r5,r4; rotcl r0
1344 div1 r5,r4; rotcl r0
1345 div1 r5,r4; rotcl r0
1346 rts; div1 r5,r4
1347
1348 GLOBAL(udivsi3):
1349 sts.l pr,@-r15
1350 extu.w r5,r0
1351 cmp/eq r5,r0
1352 #ifdef __sh1__
1353 bf LOCAL(large_divisor)
1354 #else
1355 bf/s LOCAL(large_divisor)
1356 #endif
1357 div0u
1358 swap.w r4,r0
1359 shlr16 r4
1360 bsr LOCAL(div8)
1361 shll16 r5
1362 bsr LOCAL(div7)
1363 div1 r5,r4
1364 xtrct r4,r0
1365 xtrct r0,r4
1366 bsr LOCAL(div8)
1367 swap.w r4,r4
1368 bsr LOCAL(div7)
1369 div1 r5,r4
1370 lds.l @r15+,pr
1371 xtrct r4,r0
1372 swap.w r0,r0
1373 rotcl r0
1374 rts
1375 shlr16 r5
1376
1377 LOCAL(large_divisor):
1378 #ifdef __sh1__
1379 div0u
1380 #endif
1381 mov #0,r0
1382 xtrct r4,r0
1383 xtrct r0,r4
1384 bsr LOCAL(divx4)
1385 rotcl r0
1386 bsr LOCAL(divx4)
1387 rotcl r0
1388 bsr LOCAL(divx4)
1389 rotcl r0
1390 bsr LOCAL(divx4)
1391 rotcl r0
1392 lds.l @r15+,pr
1393 rts
1394 rotcl r0
1395
1396 ENDFUNC(GLOBAL(udivsi3))
1397 #endif /* L_udivsi3 */
1398
1399 #ifdef L_set_fpscr
1400 #if !defined (__SH2A_NOFPU__)
1401 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
1402 .global GLOBAL(set_fpscr)
1403 HIDDEN_FUNC(GLOBAL(set_fpscr))
1404 GLOBAL(set_fpscr):
1405 lds r4,fpscr
1406 #ifdef __PIC__
1407 mov.l r12,@-r15
1408 #ifdef __vxworks
1409 mov.l LOCAL(set_fpscr_L0_base),r12
1410 mov.l LOCAL(set_fpscr_L0_index),r0
1411 mov.l @r12,r12
1412 mov.l @(r0,r12),r12
1413 #else
1414 mova LOCAL(set_fpscr_L0),r0
1415 mov.l LOCAL(set_fpscr_L0),r12
1416 add r0,r12
1417 #endif
1418 mov.l LOCAL(set_fpscr_L1),r0
1419 mov.l @(r0,r12),r1
1420 mov.l @r15+,r12
1421 #else
1422 mov.l LOCAL(set_fpscr_L1),r1
1423 #endif
1424 swap.w r4,r0
1425 or #24,r0
1426 #ifndef FMOVD_WORKS
1427 xor #16,r0
1428 #endif
1429 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1430 swap.w r0,r3
1431 mov.l r3,@(4,r1)
1432 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1433 swap.w r0,r2
1434 mov.l r2,@r1
1435 #endif
1436 #ifndef FMOVD_WORKS
1437 xor #8,r0
1438 #else
1439 xor #24,r0
1440 #endif
1441 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1442 swap.w r0,r2
1443 rts
1444 mov.l r2,@r1
1445 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1446 swap.w r0,r3
1447 rts
1448 mov.l r3,@(4,r1)
1449 #endif
1450 .align 2
1451 #ifdef __PIC__
1452 #ifdef __vxworks
1453 LOCAL(set_fpscr_L0_base):
1454 .long ___GOTT_BASE__
1455 LOCAL(set_fpscr_L0_index):
1456 .long ___GOTT_INDEX__
1457 #else
1458 LOCAL(set_fpscr_L0):
1459 .long _GLOBAL_OFFSET_TABLE_
1460 #endif
1461 LOCAL(set_fpscr_L1):
1462 .long GLOBAL(fpscr_values@GOT)
1463 #else
1464 LOCAL(set_fpscr_L1):
1465 .long GLOBAL(fpscr_values)
1466 #endif
1467
1468 ENDFUNC(GLOBAL(set_fpscr))
1469 #ifndef NO_FPSCR_VALUES
1470 #ifdef __ELF__
1471 .comm GLOBAL(fpscr_values),8,4
1472 #else
1473 .comm GLOBAL(fpscr_values),8
1474 #endif /* ELF */
1475 #endif /* NO_FPSCR_VALUES */
1476 #endif /* SH2E / SH3E / SH4 */
1477 #endif /* __SH2A_NOFPU__ */
1478 #endif /* L_set_fpscr */
1479 #ifdef L_ic_invalidate
1480
1481 #if defined(__SH4A__)
1482 .global GLOBAL(ic_invalidate)
1483 HIDDEN_FUNC(GLOBAL(ic_invalidate))
1484 GLOBAL(ic_invalidate):
1485 ocbwb @r4
1486 synco
1487 icbi @r4
1488 rts
1489 nop
1490 ENDFUNC(GLOBAL(ic_invalidate))
1491 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
1492 /* For system code, we use ic_invalidate_line_i, but user code
1493 needs a different mechanism. A kernel call is generally not
1494 available, and it would also be slow. Different SH4 variants use
1495 different sizes and associativities of the Icache. We use a small
1496 bit of dispatch code that can be put hidden in every shared object,
1497 which calls the actual processor-specific invalidation code in a
1498 separate module.
1499 Or if you have operating system support, the OS could mmap the
1500 procesor-specific code from a single page, since it is highly
1501 repetitive. */
1502 .global GLOBAL(ic_invalidate)
1503 HIDDEN_FUNC(GLOBAL(ic_invalidate))
1504 GLOBAL(ic_invalidate):
1505 #ifdef __pic__
1506 #ifdef __vxworks
1507 mov.l 1f,r1
1508 mov.l 2f,r0
1509 mov.l @r1,r1
1510 mov.l 0f,r2
1511 mov.l @(r0,r1),r0
1512 #else
1513 mov.l 1f,r1
1514 mova 1f,r0
1515 mov.l 0f,r2
1516 add r1,r0
1517 #endif
1518 mov.l @(r0,r2),r1
1519 #else
1520 mov.l 0f,r1
1521 #endif
1522 ocbwb @r4
1523 mov.l @(8,r1),r0
1524 sub r1,r4
1525 and r4,r0
1526 add r1,r0
1527 jmp @r0
1528 mov.l @(4,r1),r0
1529 .align 2
1530 #ifndef __pic__
1531 0: .long GLOBAL(ic_invalidate_array)
1532 #else /* __pic__ */
1533 .global GLOBAL(ic_invalidate_array)
1534 0: .long GLOBAL(ic_invalidate_array)@GOT
1535 #ifdef __vxworks
1536 1: .long ___GOTT_BASE__
1537 2: .long ___GOTT_INDEX__
1538 #else
1539 1: .long _GLOBAL_OFFSET_TABLE_
1540 #endif
1541 ENDFUNC(GLOBAL(ic_invalidate))
1542 #endif /* __pic__ */
1543 #endif /* SH4 */
1544 #endif /* L_ic_invalidate */
1545
1546 #ifdef L_ic_invalidate_array
1547 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)))
1548 .global GLOBAL(ic_invalidate_array)
1549 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
1550 .global GLOBAL(ic_invalidate_array)
1551 FUNC(GLOBAL(ic_invalidate_array))
1552 GLOBAL(ic_invalidate_array):
1553 add r1,r4
1554 synco
1555 icbi @r4
1556 rts
1557 nop
1558 .align 2
1559 .long 0
1560 ENDFUNC(GLOBAL(ic_invalidate_array))
1561 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
1562 .global GLOBAL(ic_invalidate_array)
1563 .p2align 5
1564 FUNC(GLOBAL(ic_invalidate_array))
1565 /* This must be aligned to the beginning of a cache line. */
1566 GLOBAL(ic_invalidate_array):
1567 #ifndef WAYS
1568 #define WAYS 4
1569 #define WAY_SIZE 0x4000
1570 #endif
1571 #if WAYS == 1
1572 .rept WAY_SIZE * WAYS / 32
1573 rts
1574 nop
1575 .rept 7
1576 .long WAY_SIZE - 32
1577 .endr
1578 .endr
1579 #elif WAYS <= 6
1580 .rept WAY_SIZE * WAYS / 32
1581 braf r0
1582 add #-8,r0
1583 .long WAY_SIZE + 8
1584 .long WAY_SIZE - 32
1585 .rept WAYS-2
1586 braf r0
1587 nop
1588 .endr
1589 .rept 7 - WAYS
1590 rts
1591 nop
1592 .endr
1593 .endr
1594 #else /* WAYS > 6 */
1595 /* This variant needs two different pages for mmap-ing. */
1596 .rept WAYS-1
1597 .rept WAY_SIZE / 32
1598 braf r0
1599 nop
1600 .long WAY_SIZE
1601 .rept 6
1602 .long WAY_SIZE - 32
1603 .endr
1604 .endr
1605 .endr
1606 .rept WAY_SIZE / 32
1607 rts
1608 .rept 15
1609 nop
1610 .endr
1611 .endr
1612 #endif /* WAYS */
1613 ENDFUNC(GLOBAL(ic_invalidate_array))
1614 #endif /* SH4 */
1615 #endif /* L_ic_invalidate_array */
1616
1617
1618 #ifdef L_div_table
1619
1620 #if defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
1621 /* This code uses shld, thus is not suitable for SH1 / SH2. */
1622
1623 /* Signed / unsigned division without use of FPU, optimized for SH4.
1624 Uses a lookup table for divisors in the range -128 .. +128, and
1625 div1 with case distinction for larger divisors in three more ranges.
1626 The code is lumped together with the table to allow the use of mova. */
1627 #ifdef __LITTLE_ENDIAN__
1628 #define L_LSB 0
1629 #define L_LSWMSB 1
1630 #define L_MSWLSB 2
1631 #else
1632 #define L_LSB 3
1633 #define L_LSWMSB 2
1634 #define L_MSWLSB 1
1635 #endif
1636
1637 .balign 4
1638 .global GLOBAL(udivsi3_i4i)
1639 FUNC(GLOBAL(udivsi3_i4i))
1640 GLOBAL(udivsi3_i4i):
1641 mov.w LOCAL(c128_w), r1
1642 div0u
1643 mov r4,r0
1644 shlr8 r0
1645 cmp/hi r1,r5
1646 extu.w r5,r1
1647 bf LOCAL(udiv_le128)
1648 cmp/eq r5,r1
1649 bf LOCAL(udiv_ge64k)
1650 shlr r0
1651 mov r5,r1
1652 shll16 r5
1653 mov.l r4,@-r15
1654 div1 r5,r0
1655 mov.l r1,@-r15
1656 div1 r5,r0
1657 div1 r5,r0
1658 bra LOCAL(udiv_25)
1659 div1 r5,r0
1660
1661 LOCAL(div_le128):
1662 mova LOCAL(div_table_ix),r0
1663 bra LOCAL(div_le128_2)
1664 mov.b @(r0,r5),r1
1665 LOCAL(udiv_le128):
1666 mov.l r4,@-r15
1667 mova LOCAL(div_table_ix),r0
1668 mov.b @(r0,r5),r1
1669 mov.l r5,@-r15
1670 LOCAL(div_le128_2):
1671 mova LOCAL(div_table_inv),r0
1672 mov.l @(r0,r1),r1
1673 mov r5,r0
1674 tst #0xfe,r0
1675 mova LOCAL(div_table_clz),r0
1676 dmulu.l r1,r4
1677 mov.b @(r0,r5),r1
1678 bt/s LOCAL(div_by_1)
1679 mov r4,r0
1680 mov.l @r15+,r5
1681 sts mach,r0
1682 /* clrt */
1683 addc r4,r0
1684 mov.l @r15+,r4
1685 rotcr r0
1686 rts
1687 shld r1,r0
1688
1689 LOCAL(div_by_1_neg):
1690 neg r4,r0
1691 LOCAL(div_by_1):
1692 mov.l @r15+,r5
1693 rts
1694 mov.l @r15+,r4
1695
1696 LOCAL(div_ge64k):
1697 bt/s LOCAL(div_r8)
1698 div0u
1699 shll8 r5
1700 bra LOCAL(div_ge64k_2)
1701 div1 r5,r0
1702 LOCAL(udiv_ge64k):
1703 cmp/hi r0,r5
1704 mov r5,r1
1705 bt LOCAL(udiv_r8)
1706 shll8 r5
1707 mov.l r4,@-r15
1708 div1 r5,r0
1709 mov.l r1,@-r15
1710 LOCAL(div_ge64k_2):
1711 div1 r5,r0
1712 mov.l LOCAL(zero_l),r1
1713 .rept 4
1714 div1 r5,r0
1715 .endr
1716 mov.l r1,@-r15
1717 div1 r5,r0
1718 mov.w LOCAL(m256_w),r1
1719 div1 r5,r0
1720 mov.b r0,@(L_LSWMSB,r15)
1721 xor r4,r0
1722 and r1,r0
1723 bra LOCAL(div_ge64k_end)
1724 xor r4,r0
1725
1726 LOCAL(div_r8):
1727 shll16 r4
1728 bra LOCAL(div_r8_2)
1729 shll8 r4
1730 LOCAL(udiv_r8):
1731 mov.l r4,@-r15
1732 shll16 r4
1733 clrt
1734 shll8 r4
1735 mov.l r5,@-r15
1736 LOCAL(div_r8_2):
1737 rotcl r4
1738 mov r0,r1
1739 div1 r5,r1
1740 mov r4,r0
1741 rotcl r0
1742 mov r5,r4
1743 div1 r5,r1
1744 .rept 5
1745 rotcl r0; div1 r5,r1
1746 .endr
1747 rotcl r0
1748 mov.l @r15+,r5
1749 div1 r4,r1
1750 mov.l @r15+,r4
1751 rts
1752 rotcl r0
1753
1754 ENDFUNC(GLOBAL(udivsi3_i4i))
1755
1756 .global GLOBAL(sdivsi3_i4i)
1757 FUNC(GLOBAL(sdivsi3_i4i))
1758 /* This is link-compatible with a GLOBAL(sdivsi3) call,
1759 but we effectively clobber only r1. */
1760 GLOBAL(sdivsi3_i4i):
1761 mov.l r4,@-r15
1762 cmp/pz r5
1763 mov.w LOCAL(c128_w), r1
1764 bt/s LOCAL(pos_divisor)
1765 cmp/pz r4
1766 mov.l r5,@-r15
1767 neg r5,r5
1768 bt/s LOCAL(neg_result)
1769 cmp/hi r1,r5
1770 neg r4,r4
1771 LOCAL(pos_result):
1772 extu.w r5,r0
1773 bf LOCAL(div_le128)
1774 cmp/eq r5,r0
1775 mov r4,r0
1776 shlr8 r0
1777 bf/s LOCAL(div_ge64k)
1778 cmp/hi r0,r5
1779 div0u
1780 shll16 r5
1781 div1 r5,r0
1782 div1 r5,r0
1783 div1 r5,r0
1784 LOCAL(udiv_25):
1785 mov.l LOCAL(zero_l),r1
1786 div1 r5,r0
1787 div1 r5,r0
1788 mov.l r1,@-r15
1789 .rept 3
1790 div1 r5,r0
1791 .endr
1792 mov.b r0,@(L_MSWLSB,r15)
1793 xtrct r4,r0
1794 swap.w r0,r0
1795 .rept 8
1796 div1 r5,r0
1797 .endr
1798 mov.b r0,@(L_LSWMSB,r15)
1799 LOCAL(div_ge64k_end):
1800 .rept 8
1801 div1 r5,r0
1802 .endr
1803 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
1804 extu.b r0,r0
1805 mov.l @r15+,r5
1806 or r4,r0
1807 mov.l @r15+,r4
1808 rts
1809 rotcl r0
1810
1811 LOCAL(div_le128_neg):
1812 tst #0xfe,r0
1813 mova LOCAL(div_table_ix),r0
1814 mov.b @(r0,r5),r1
1815 mova LOCAL(div_table_inv),r0
1816 bt/s LOCAL(div_by_1_neg)
1817 mov.l @(r0,r1),r1
1818 mova LOCAL(div_table_clz),r0
1819 dmulu.l r1,r4
1820 mov.b @(r0,r5),r1
1821 mov.l @r15+,r5
1822 sts mach,r0
1823 /* clrt */
1824 addc r4,r0
1825 mov.l @r15+,r4
1826 rotcr r0
1827 shld r1,r0
1828 rts
1829 neg r0,r0
1830
1831 LOCAL(pos_divisor):
1832 mov.l r5,@-r15
1833 bt/s LOCAL(pos_result)
1834 cmp/hi r1,r5
1835 neg r4,r4
1836 LOCAL(neg_result):
1837 extu.w r5,r0
1838 bf LOCAL(div_le128_neg)
1839 cmp/eq r5,r0
1840 mov r4,r0
1841 shlr8 r0
1842 bf/s LOCAL(div_ge64k_neg)
1843 cmp/hi r0,r5
1844 div0u
1845 mov.l LOCAL(zero_l),r1
1846 shll16 r5
1847 div1 r5,r0
1848 mov.l r1,@-r15
1849 .rept 7
1850 div1 r5,r0
1851 .endr
1852 mov.b r0,@(L_MSWLSB,r15)
1853 xtrct r4,r0
1854 swap.w r0,r0
1855 .rept 8
1856 div1 r5,r0
1857 .endr
1858 mov.b r0,@(L_LSWMSB,r15)
1859 LOCAL(div_ge64k_neg_end):
1860 .rept 8
1861 div1 r5,r0
1862 .endr
1863 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
1864 extu.b r0,r1
1865 mov.l @r15+,r5
1866 or r4,r1
1867 LOCAL(div_r8_neg_end):
1868 mov.l @r15+,r4
1869 rotcl r1
1870 rts
1871 neg r1,r0
1872
1873 LOCAL(div_ge64k_neg):
1874 bt/s LOCAL(div_r8_neg)
1875 div0u
1876 shll8 r5
1877 mov.l LOCAL(zero_l),r1
1878 .rept 6
1879 div1 r5,r0
1880 .endr
1881 mov.l r1,@-r15
1882 div1 r5,r0
1883 mov.w LOCAL(m256_w),r1
1884 div1 r5,r0
1885 mov.b r0,@(L_LSWMSB,r15)
1886 xor r4,r0
1887 and r1,r0
1888 bra LOCAL(div_ge64k_neg_end)
1889 xor r4,r0
1890
1891 LOCAL(c128_w):
1892 .word 128
1893
1894 LOCAL(div_r8_neg):
1895 clrt
1896 shll16 r4
1897 mov r4,r1
1898 shll8 r1
1899 mov r5,r4
1900 .rept 7
1901 rotcl r1; div1 r5,r0
1902 .endr
1903 mov.l @r15+,r5
1904 rotcl r1
1905 bra LOCAL(div_r8_neg_end)
1906 div1 r4,r0
1907
1908 LOCAL(m256_w):
1909 .word 0xff00
1910 /* This table has been generated by divtab-sh4.c. */
1911 .balign 4
1912 LOCAL(div_table_clz):
1913 .byte 0
1914 .byte 1
1915 .byte 0
1916 .byte -1
1917 .byte -1
1918 .byte -2
1919 .byte -2
1920 .byte -2
1921 .byte -2
1922 .byte -3
1923 .byte -3
1924 .byte -3
1925 .byte -3
1926 .byte -3
1927 .byte -3
1928 .byte -3
1929 .byte -3
1930 .byte -4
1931 .byte -4
1932 .byte -4
1933 .byte -4
1934 .byte -4
1935 .byte -4
1936 .byte -4
1937 .byte -4
1938 .byte -4
1939 .byte -4
1940 .byte -4
1941 .byte -4
1942 .byte -4
1943 .byte -4
1944 .byte -4
1945 .byte -4
1946 .byte -5
1947 .byte -5
1948 .byte -5
1949 .byte -5
1950 .byte -5
1951 .byte -5
1952 .byte -5
1953 .byte -5
1954 .byte -5
1955 .byte -5
1956 .byte -5
1957 .byte -5
1958 .byte -5
1959 .byte -5
1960 .byte -5
1961 .byte -5
1962 .byte -5
1963 .byte -5
1964 .byte -5
1965 .byte -5
1966 .byte -5
1967 .byte -5
1968 .byte -5
1969 .byte -5
1970 .byte -5
1971 .byte -5
1972 .byte -5
1973 .byte -5
1974 .byte -5
1975 .byte -5
1976 .byte -5
1977 .byte -5
1978 .byte -6
1979 .byte -6
1980 .byte -6
1981 .byte -6
1982 .byte -6
1983 .byte -6
1984 .byte -6
1985 .byte -6
1986 .byte -6
1987 .byte -6
1988 .byte -6
1989 .byte -6
1990 .byte -6
1991 .byte -6
1992 .byte -6
1993 .byte -6
1994 .byte -6
1995 .byte -6
1996 .byte -6
1997 .byte -6
1998 .byte -6
1999 .byte -6
2000 .byte -6
2001 .byte -6
2002 .byte -6
2003 .byte -6
2004 .byte -6
2005 .byte -6
2006 .byte -6
2007 .byte -6
2008 .byte -6
2009 .byte -6
2010 .byte -6
2011 .byte -6
2012 .byte -6
2013 .byte -6
2014 .byte -6
2015 .byte -6
2016 .byte -6
2017 .byte -6
2018 .byte -6
2019 .byte -6
2020 .byte -6
2021 .byte -6
2022 .byte -6
2023 .byte -6
2024 .byte -6
2025 .byte -6
2026 .byte -6
2027 .byte -6
2028 .byte -6
2029 .byte -6
2030 .byte -6
2031 .byte -6
2032 .byte -6
2033 .byte -6
2034 .byte -6
2035 .byte -6
2036 .byte -6
2037 .byte -6
2038 .byte -6
2039 .byte -6
2040 .byte -6
2041 /* Lookup table translating positive divisor to index into table of
2042 normalized inverse. N.B. the '0' entry is also the last entry of the
2043 previous table, and causes an unaligned access for division by zero. */
2044 LOCAL(div_table_ix):
2045 .byte -6
2046 .byte -128
2047 .byte -128
2048 .byte 0
2049 .byte -128
2050 .byte -64
2051 .byte 0
2052 .byte 64
2053 .byte -128
2054 .byte -96
2055 .byte -64
2056 .byte -32
2057 .byte 0
2058 .byte 32
2059 .byte 64
2060 .byte 96
2061 .byte -128
2062 .byte -112
2063 .byte -96
2064 .byte -80
2065 .byte -64
2066 .byte -48
2067 .byte -32
2068 .byte -16
2069 .byte 0
2070 .byte 16
2071 .byte 32
2072 .byte 48
2073 .byte 64
2074 .byte 80
2075 .byte 96
2076 .byte 112
2077 .byte -128
2078 .byte -120
2079 .byte -112
2080 .byte -104
2081 .byte -96
2082 .byte -88
2083 .byte -80
2084 .byte -72
2085 .byte -64
2086 .byte -56
2087 .byte -48
2088 .byte -40
2089 .byte -32
2090 .byte -24
2091 .byte -16
2092 .byte -8
2093 .byte 0
2094 .byte 8
2095 .byte 16
2096 .byte 24
2097 .byte 32
2098 .byte 40
2099 .byte 48
2100 .byte 56
2101 .byte 64
2102 .byte 72
2103 .byte 80
2104 .byte 88
2105 .byte 96
2106 .byte 104
2107 .byte 112
2108 .byte 120
2109 .byte -128
2110 .byte -124
2111 .byte -120
2112 .byte -116
2113 .byte -112
2114 .byte -108
2115 .byte -104
2116 .byte -100
2117 .byte -96
2118 .byte -92
2119 .byte -88
2120 .byte -84
2121 .byte -80
2122 .byte -76
2123 .byte -72
2124 .byte -68
2125 .byte -64
2126 .byte -60
2127 .byte -56
2128 .byte -52
2129 .byte -48
2130 .byte -44
2131 .byte -40
2132 .byte -36
2133 .byte -32
2134 .byte -28
2135 .byte -24
2136 .byte -20
2137 .byte -16
2138 .byte -12
2139 .byte -8
2140 .byte -4
2141 .byte 0
2142 .byte 4
2143 .byte 8
2144 .byte 12
2145 .byte 16
2146 .byte 20
2147 .byte 24
2148 .byte 28
2149 .byte 32
2150 .byte 36
2151 .byte 40
2152 .byte 44
2153 .byte 48
2154 .byte 52
2155 .byte 56
2156 .byte 60
2157 .byte 64
2158 .byte 68
2159 .byte 72
2160 .byte 76
2161 .byte 80
2162 .byte 84
2163 .byte 88
2164 .byte 92
2165 .byte 96
2166 .byte 100
2167 .byte 104
2168 .byte 108
2169 .byte 112
2170 .byte 116
2171 .byte 120
2172 .byte 124
2173 .byte -128
2174 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
2175 .balign 4
2176 LOCAL(zero_l):
2177 .long 0x0
2178 .long 0xF81F81F9
2179 .long 0xF07C1F08
2180 .long 0xE9131AC0
2181 .long 0xE1E1E1E2
2182 .long 0xDAE6076C
2183 .long 0xD41D41D5
2184 .long 0xCD856891
2185 .long 0xC71C71C8
2186 .long 0xC0E07039
2187 .long 0xBACF914D
2188 .long 0xB4E81B4F
2189 .long 0xAF286BCB
2190 .long 0xA98EF607
2191 .long 0xA41A41A5
2192 .long 0x9EC8E952
2193 .long 0x9999999A
2194 .long 0x948B0FCE
2195 .long 0x8F9C18FA
2196 .long 0x8ACB90F7
2197 .long 0x86186187
2198 .long 0x81818182
2199 .long 0x7D05F418
2200 .long 0x78A4C818
2201 .long 0x745D1746
2202 .long 0x702E05C1
2203 .long 0x6C16C16D
2204 .long 0x68168169
2205 .long 0x642C8591
2206 .long 0x60581606
2207 .long 0x5C9882BA
2208 .long 0x58ED2309
2209 LOCAL(div_table_inv):
2210 .long 0x55555556
2211 .long 0x51D07EAF
2212 .long 0x4E5E0A73
2213 .long 0x4AFD6A06
2214 .long 0x47AE147B
2215 .long 0x446F8657
2216 .long 0x41414142
2217 .long 0x3E22CBCF
2218 .long 0x3B13B13C
2219 .long 0x38138139
2220 .long 0x3521CFB3
2221 .long 0x323E34A3
2222 .long 0x2F684BDB
2223 .long 0x2C9FB4D9
2224 .long 0x29E4129F
2225 .long 0x27350B89
2226 .long 0x24924925
2227 .long 0x21FB7813
2228 .long 0x1F7047DD
2229 .long 0x1CF06ADB
2230 .long 0x1A7B9612
2231 .long 0x18118119
2232 .long 0x15B1E5F8
2233 .long 0x135C8114
2234 .long 0x11111112
2235 .long 0xECF56BF
2236 .long 0xC9714FC
2237 .long 0xA6810A7
2238 .long 0x8421085
2239 .long 0x624DD30
2240 .long 0x4104105
2241 .long 0x2040811
2242 /* maximum error: 0.987342 scaled: 0.921875*/
2243
2244 ENDFUNC(GLOBAL(sdivsi3_i4i))
2245 #endif /* SH3 / SH4 */
2246
2247 #endif /* L_div_table */
2248
2249 #ifdef L_udiv_qrnnd_16
2250 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
2251 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
2252 /* n1 < d, but n1 might be larger than d1. */
2253 .global GLOBAL(udiv_qrnnd_16)
2254 .balign 8
2255 GLOBAL(udiv_qrnnd_16):
2256 div0u
2257 cmp/hi r6,r0
2258 bt .Lots
2259 .rept 16
2260 div1 r6,r0
2261 .endr
2262 extu.w r0,r1
2263 bt 0f
2264 add r6,r0
2265 0: rotcl r1
2266 mulu.w r1,r5
2267 xtrct r4,r0
2268 swap.w r0,r0
2269 sts macl,r2
2270 cmp/hs r2,r0
2271 sub r2,r0
2272 bt 0f
2273 addc r5,r0
2274 add #-1,r1
2275 bt 0f
2276 1: add #-1,r1
2277 rts
2278 add r5,r0
2279 .balign 8
2280 .Lots:
2281 sub r5,r0
2282 swap.w r4,r1
2283 xtrct r0,r1
2284 clrt
2285 mov r1,r0
2286 addc r5,r0
2287 mov #-1,r1
2288 SL1(bf, 1b,
2289 shlr16 r1)
2290 0: rts
2291 nop
2292 ENDFUNC(GLOBAL(udiv_qrnnd_16))
2293 #endif /* L_udiv_qrnnd_16 */