]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/h8300/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / h8300 / lib1funcs.S
1 ;; libgcc routines for the Renesas H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
4
5 /* Copyright (C) 1994-2023 Free Software Foundation, Inc.
6
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 This file is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* Assembler register definitions. */
27
28 #define A0 r0
29 #define A0L r0l
30 #define A0H r0h
31
32 #define A1 r1
33 #define A1L r1l
34 #define A1H r1h
35
36 #define A2 r2
37 #define A2L r2l
38 #define A2H r2h
39
40 #define A3 r3
41 #define A3L r3l
42 #define A3H r3h
43
44 #define S0 r4
45 #define S0L r4l
46 #define S0H r4h
47
48 #define S1 r5
49 #define S1L r5l
50 #define S1H r5h
51
52 #define S2 r6
53 #define S2L r6l
54 #define S2H r6h
55
56 #ifdef __H8300__
57 #define PUSHP push
58 #define POPP pop
59
60 #define A0P r0
61 #define A1P r1
62 #define A2P r2
63 #define A3P r3
64 #define S0P r4
65 #define S1P r5
66 #define S2P r6
67 #endif
68
69 #if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
70 #define PUSHP push.l
71 #define POPP pop.l
72
73 #define A0P er0
74 #define A1P er1
75 #define A2P er2
76 #define A3P er3
77 #define S0P er4
78 #define S1P er5
79 #define S2P er6
80
81 #define A0E e0
82 #define A1E e1
83 #define A2E e2
84 #define A3E e3
85 #endif
86
87 #define CONCAT(A,B) A##B
88 #define LABEL0(U,X) CONCAT(U,__##X)
89 #define LABEL0_DEF(U,X) CONCAT(U,__##X##:)
90 #define LABEL_DEF(X) LABEL0_DEF(__USER_LABEL_PREFIX__,X)
91 #define LABEL(X) LABEL0(__USER_LABEL_PREFIX__,X)
92
93 #ifdef __H8300H__
94 #ifdef __NORMAL_MODE__
95 .h8300hn
96 #else
97 .h8300h
98 #endif
99 #endif
100
101 #ifdef __H8300S__
102 #ifdef __NORMAL_MODE__
103 .h8300sn
104 #else
105 .h8300s
106 #endif
107 #endif
108 #ifdef __H8300SX__
109 #ifdef __NORMAL_MODE__
110 .h8300sxn
111 #else
112 .h8300sx
113 #endif
114 #endif
115
116 #ifdef L_cmpsi2
117 #ifdef __H8300__
118 .section .text
119 .align 2
120 .global LABEL(cmpsi2)
121 LABEL_DEF(cmpsi2)
122 cmp.w A0,A2
123 bne .L2
124 cmp.w A1,A3
125 bne .L4
126 mov.w #1,A0
127 rts
128 .L2:
129 bgt .L5
130 .L3:
131 mov.w #2,A0
132 rts
133 .L4:
134 bls .L3
135 .L5:
136 sub.w A0,A0
137 rts
138 .end
139 #endif
140 #endif /* L_cmpsi2 */
141
142 #ifdef L_ucmpsi2
143 #ifdef __H8300__
144 .section .text
145 .align 2
146 .global LABEL(ucmpsi2)
147 LABEL_DEF(ucmpsi2)
148 cmp.w A0,A2
149 bne .L2
150 cmp.w A1,A3
151 bne .L4
152 mov.w #1,A0
153 rts
154 .L2:
155 bhi .L5
156 .L3:
157 mov.w #2,A0
158 rts
159 .L4:
160 bls .L3
161 .L5:
162 sub.w A0,A0
163 rts
164 .end
165 #endif
166 #endif /* L_ucmpsi2 */
167
168 #ifdef L_divhi3
169
170 ;; HImode divides for the H8/300.
171 ;; We bunch all of this into one object file since there are several
172 ;; "supporting routines".
173
174 ; general purpose normalize routine
175 ;
176 ; divisor in A0
177 ; dividend in A1
178 ; turns both into +ve numbers, and leaves what the answer sign
179 ; should be in A2L
180
181 #ifdef __H8300__
182 .section .text
183 .align 2
184 divnorm:
185 or A0H,A0H ; is divisor > 0
186 stc ccr,A2L
187 bge _lab1
188 not A0H ; no - then make it +ve
189 not A0L
190 adds #1,A0
191 _lab1: or A1H,A1H ; look at dividend
192 bge _lab2
193 not A1H ; it is -ve, make it positive
194 not A1L
195 adds #1,A1
196 xor #0x8,A2L; and toggle sign of result
197 _lab2: rts
198 ;; Basically the same, except that the sign of the divisor determines
199 ;; the sign.
200 modnorm:
201 or A0H,A0H ; is divisor > 0
202 stc ccr,A2L
203 bge _lab7
204 not A0H ; no - then make it +ve
205 not A0L
206 adds #1,A0
207 _lab7: or A1H,A1H ; look at dividend
208 bge _lab8
209 not A1H ; it is -ve, make it positive
210 not A1L
211 adds #1,A1
212 _lab8: rts
213
214 ; A0=A0/A1 signed
215
216 .global LABEL(divhi3)
217 LABEL_DEF(divhi3)
218 bsr divnorm
219 bsr LABEL(udivhi3)
220 negans: btst #3,A2L ; should answer be negative ?
221 beq _lab4
222 not A0H ; yes, so make it so
223 not A0L
224 adds #1,A0
225 _lab4: rts
226
227 ; A0=A0%A1 signed
228
229 .global LABEL(modhi3)
230 LABEL_DEF(modhi3)
231 bsr modnorm
232 bsr LABEL(udivhi3)
233 mov A3,A0
234 bra negans
235
236 ; A0=A0%A1 unsigned
237
238 .global LABEL(umodhi3)
239 LABEL_DEF(umodhi3)
240 bsr LABEL(udivhi3)
241 mov A3,A0
242 rts
243
244 ; A0=A0/A1 unsigned
245 ; A3=A0%A1 unsigned
246 ; A2H trashed
247 ; D high 8 bits of denom
248 ; d low 8 bits of denom
249 ; N high 8 bits of num
250 ; n low 8 bits of num
251 ; M high 8 bits of mod
252 ; m low 8 bits of mod
253 ; Q high 8 bits of quot
254 ; q low 8 bits of quot
255 ; P preserve
256
257 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
258 ; see how to partition up the expression.
259
260 .global LABEL(udivhi3)
261 LABEL_DEF(udivhi3)
262 ; A0 A1 A2 A3
263 ; Nn Dd P
264 sub.w A3,A3 ; Nn Dd xP 00
265 or A1H,A1H
266 bne divlongway
267 or A0H,A0H
268 beq _lab6
269
270 ; we know that D == 0 and N is != 0
271 mov.b A0H,A3L ; Nn Dd xP 0N
272 divxu A1L,A3 ; MQ
273 mov.b A3L,A0H ; Q
274 ; dealt with N, do n
275 _lab6: mov.b A0L,A3L ; n
276 divxu A1L,A3 ; mq
277 mov.b A3L,A0L ; Qq
278 mov.b A3H,A3L ; m
279 mov.b #0x0,A3H ; Qq 0m
280 rts
281
282 ; D != 0 - which means the denominator is
283 ; loop around to get the result.
284
285 divlongway:
286 mov.b A0H,A3L ; Nn Dd xP 0N
287 mov.b #0x0,A0H ; high byte of answer has to be zero
288 mov.b #0x8,A2H ; 8
289 div8: add.b A0L,A0L ; n*=2
290 rotxl A3L ; Make remainder bigger
291 rotxl A3H
292 sub.w A1,A3 ; Q-=N
293 bhs setbit ; set a bit ?
294 add.w A1,A3 ; no : too far , Q+=N
295
296 dec A2H
297 bne div8 ; next bit
298 rts
299
300 setbit: inc A0L ; do insert bit
301 dec A2H
302 bne div8 ; next bit
303 rts
304
305 #endif /* __H8300__ */
306 #endif /* L_divhi3 */
307
308 #ifdef L_divsi3
309
310 ;; 4 byte integer divides for the H8/300.
311 ;;
312 ;; We have one routine which does all the work and lots of
313 ;; little ones which prepare the args and massage the sign.
314 ;; We bunch all of this into one object file since there are several
315 ;; "supporting routines".
316
317 .section .text
318 .align 2
319
320 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
321 ; This function is here to keep branch displacements small.
322
323 #ifdef __H8300__
324
325 divnorm:
326 mov.b A0H,A0H ; is the numerator -ve
327 stc ccr,S2L ; keep the sign in bit 3 of S2L
328 bge postive
329
330 ; negate arg
331 not A0H
332 not A1H
333 not A0L
334 not A1L
335
336 add #1,A1L
337 addx #0,A1H
338 addx #0,A0L
339 addx #0,A0H
340 postive:
341 mov.b A2H,A2H ; is the denominator -ve
342 bge postive2
343 not A2L
344 not A2H
345 not A3L
346 not A3H
347 add.b #1,A3L
348 addx #0,A3H
349 addx #0,A2L
350 addx #0,A2H
351 xor.b #0x08,S2L ; toggle the result sign
352 postive2:
353 rts
354
355 ;; Basically the same, except that the sign of the divisor determines
356 ;; the sign.
357 modnorm:
358 mov.b A0H,A0H ; is the numerator -ve
359 stc ccr,S2L ; keep the sign in bit 3 of S2L
360 bge mpostive
361
362 ; negate arg
363 not A0H
364 not A1H
365 not A0L
366 not A1L
367
368 add #1,A1L
369 addx #0,A1H
370 addx #0,A0L
371 addx #0,A0H
372 mpostive:
373 mov.b A2H,A2H ; is the denominator -ve
374 bge mpostive2
375 not A2L
376 not A2H
377 not A3L
378 not A3H
379 add.b #1,A3L
380 addx #0,A3H
381 addx #0,A2L
382 addx #0,A2H
383 mpostive2:
384 rts
385
386 #else /* __H8300H__ */
387
388 divnorm:
389 mov.l A0P,A0P ; is the numerator -ve
390 stc ccr,S2L ; keep the sign in bit 3 of S2L
391 bge postive
392
393 neg.l A0P ; negate arg
394
395 postive:
396 mov.l A1P,A1P ; is the denominator -ve
397 bge postive2
398
399 neg.l A1P ; negate arg
400 xor.b #0x08,S2L ; toggle the result sign
401
402 postive2:
403 rts
404
405 ;; Basically the same, except that the sign of the divisor determines
406 ;; the sign.
407 modnorm:
408 mov.l A0P,A0P ; is the numerator -ve
409 stc ccr,S2L ; keep the sign in bit 3 of S2L
410 bge mpostive
411
412 neg.l A0P ; negate arg
413
414 mpostive:
415 mov.l A1P,A1P ; is the denominator -ve
416 bge mpostive2
417
418 neg.l A1P ; negate arg
419
420 mpostive2:
421 rts
422
423 #endif
424
425 ; numerator in A0/A1
426 ; denominator in A2/A3
427 .global LABEL(modsi3)
428 LABEL_DEF(modsi3)
429 #ifdef __H8300__
430 PUSHP S2P
431 PUSHP S0P
432 PUSHP S1P
433 bsr modnorm
434 bsr divmodsi4
435 mov S0,A0
436 mov S1,A1
437 bra exitdiv
438 #else
439 PUSHP S2P
440 bsr modnorm
441 bsr LABEL(divsi3)
442 mov.l er3,er0
443 bra exitdiv
444 #endif
445
446 ;; H8/300H and H8S version of ___udivsi3 is defined later in
447 ;; the file.
448 #ifdef __H8300__
449 .global LABEL(udivsi3)
450 LABEL_DEF(udivsi3)
451 PUSHP S2P
452 PUSHP S0P
453 PUSHP S1P
454 bsr divmodsi4
455 bra reti
456 #endif
457
458 .global LABEL(umodsi3)
459 LABEL_DEF(umodsi3)
460 #ifdef __H8300__
461 PUSHP S2P
462 PUSHP S0P
463 PUSHP S1P
464 bsr divmodsi4
465 mov S0,A0
466 mov S1,A1
467 bra reti
468 #else
469 bsr LABEL(udivsi3)
470 mov.l er3,er0
471 rts
472 #endif
473
474 .global LABEL(divsi3)
475 LABEL_DEF(divsi3)
476 #ifdef __H8300__
477 PUSHP S2P
478 PUSHP S0P
479 PUSHP S1P
480 jsr divnorm
481 jsr divmodsi4
482 #else
483 PUSHP S2P
484 jsr divnorm
485 bsr LABEL(udivsi3)
486 #endif
487
488 ; examine what the sign should be
489 exitdiv:
490 btst #3,S2L
491 beq reti
492
493 ; should be -ve
494 #ifdef __H8300__
495 not A0H
496 not A1H
497 not A0L
498 not A1L
499
500 add #1,A1L
501 addx #0,A1H
502 addx #0,A0L
503 addx #0,A0H
504 #else /* __H8300H__ */
505 neg.l A0P
506 #endif
507
508 reti:
509 #ifdef __H8300__
510 POPP S1P
511 POPP S0P
512 #endif
513 POPP S2P
514 rts
515
516 ; takes A0/A1 numerator (A0P for H8/300H)
517 ; A2/A3 denominator (A1P for H8/300H)
518 ; returns A0/A1 quotient (A0P for H8/300H)
519 ; S0/S1 remainder (S0P for H8/300H)
520 ; trashes S2H
521
522 #ifdef __H8300__
523
524 divmodsi4:
525 sub.w S0,S0 ; zero play area
526 mov.w S0,S1
527 mov.b A2H,S2H
528 or A2L,S2H
529 or A3H,S2H
530 bne DenHighNonZero
531 mov.b A0H,A0H
532 bne NumByte0Zero
533 mov.b A0L,A0L
534 bne NumByte1Zero
535 mov.b A1H,A1H
536 bne NumByte2Zero
537 bra NumByte3Zero
538 NumByte0Zero:
539 mov.b A0H,S1L
540 divxu A3L,S1
541 mov.b S1L,A0H
542 NumByte1Zero:
543 mov.b A0L,S1L
544 divxu A3L,S1
545 mov.b S1L,A0L
546 NumByte2Zero:
547 mov.b A1H,S1L
548 divxu A3L,S1
549 mov.b S1L,A1H
550 NumByte3Zero:
551 mov.b A1L,S1L
552 divxu A3L,S1
553 mov.b S1L,A1L
554
555 mov.b S1H,S1L
556 mov.b #0x0,S1H
557 rts
558
559 ; have to do the divide by shift and test
560 DenHighNonZero:
561 mov.b A0H,S1L
562 mov.b A0L,A0H
563 mov.b A1H,A0L
564 mov.b A1L,A1H
565
566 mov.b #0,A1L
567 mov.b #24,S2H ; only do 24 iterations
568
569 nextbit:
570 add.w A1,A1 ; double the answer guess
571 rotxl A0L
572 rotxl A0H
573
574 rotxl S1L ; double remainder
575 rotxl S1H
576 rotxl S0L
577 rotxl S0H
578 sub.w A3,S1 ; does it all fit
579 subx A2L,S0L
580 subx A2H,S0H
581 bhs setone
582
583 add.w A3,S1 ; no, restore mistake
584 addx A2L,S0L
585 addx A2H,S0H
586
587 dec S2H
588 bne nextbit
589 rts
590
591 setone:
592 inc A1L
593 dec S2H
594 bne nextbit
595 rts
596
597 #else /* __H8300H__ */
598
599 ;; This function also computes the remainder and stores it in er3.
600 .global LABEL(udivsi3)
601 LABEL_DEF(udivsi3)
602 mov.w A1E,A1E ; denominator top word 0?
603 bne DenHighNonZero
604
605 ; do it the easy way, see page 107 in manual
606 mov.w A0E,A2
607 extu.l A2P
608 divxu.w A1,A2P
609 mov.w A2E,A0E
610 divxu.w A1,A0P
611 mov.w A0E,A3
612 mov.w A2,A0E
613 extu.l A3P
614 rts
615
616 ; er0 = er0 / er1
617 ; er3 = er0 % er1
618 ; trashes er1 er2
619 ; expects er1 >= 2^16
620 DenHighNonZero:
621 mov.l er0,er3
622 mov.l er1,er2
623 #ifdef __H8300H__
624 divmod_L21:
625 shlr.l er0
626 shlr.l er2 ; make divisor < 2^16
627 mov.w e2,e2
628 bne divmod_L21
629 #else
630 shlr.l #2,er2 ; make divisor < 2^16
631 mov.w e2,e2
632 beq divmod_L22A
633 divmod_L21:
634 shlr.l #2,er0
635 divmod_L22:
636 shlr.l #2,er2 ; make divisor < 2^16
637 mov.w e2,e2
638 bne divmod_L21
639 divmod_L22A:
640 rotxl.w r2
641 bcs divmod_L23
642 shlr.l er0
643 bra divmod_L24
644 divmod_L23:
645 rotxr.w r2
646 shlr.l #2,er0
647 divmod_L24:
648 #endif
649 ;; At this point,
650 ;; er0 contains shifted dividend
651 ;; er1 contains divisor
652 ;; er2 contains shifted divisor
653 ;; er3 contains dividend, later remainder
654 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
655 extu.l er0
656 beq divmod_L25
657 subs #1,er0 ; er0 = AQ - 1
658 mov.w e1,r2
659 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
660 sub.w r2,e3 ; dividend - 65536 * er2
661 mov.w r1,r2
662 mulxu.w r0,er2 ; compute er3 = remainder (tentative)
663 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
664 divmod_L25:
665 cmp.l er1,er3 ; is divisor < remainder?
666 blo divmod_L26
667 adds #1,er0
668 sub.l er1,er3 ; correct the remainder
669 divmod_L26:
670 rts
671
672 #endif
673 #endif /* L_divsi3 */
674
675 #ifdef L_mulhi3
676
677 ;; HImode multiply.
678 ; The H8/300 only has an 8*8->16 multiply.
679 ; The answer is the same as:
680 ;
681 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
682 ; (we can ignore A1.h * A0.h cause that will all off the top)
683 ; A0 in
684 ; A1 in
685 ; A0 answer
686
687 #ifdef __H8300__
688 .section .text
689 .align 2
690 .global LABEL(mulhi3)
691 LABEL_DEF(mulhi3)
692 mov.b A1L,A2L ; A2l gets srcb.l
693 mulxu A0L,A2 ; A2 gets first sub product
694
695 mov.b A0H,A3L ; prepare for
696 mulxu A1L,A3 ; second sub product
697
698 add.b A3L,A2H ; sum first two terms
699
700 mov.b A1H,A3L ; third sub product
701 mulxu A0L,A3
702
703 add.b A3L,A2H ; almost there
704 mov.w A2,A0 ; that is
705 rts
706
707 #endif
708 #endif /* L_mulhi3 */
709
710 #ifdef L_mulsi3
711
712 ;; SImode multiply.
713 ;;
714 ;; I think that shift and add may be sufficient for this. Using the
715 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
716 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
717 ;; quickly on small args.
718 ;;
719 ;; A0/A1 src_a
720 ;; A2/A3 src_b
721 ;;
722 ;; while (a)
723 ;; {
724 ;; if (a & 1)
725 ;; r += b;
726 ;; a >>= 1;
727 ;; b <<= 1;
728 ;; }
729
730 .section .text
731 .align 2
732
733 #ifdef __H8300__
734
735 .global LABEL(mulsi3)
736 LABEL_DEF(mulsi3)
737 PUSHP S0P
738 PUSHP S1P
739
740 sub.w S0,S0
741 sub.w S1,S1
742
743 ; while (a)
744 _top: mov.w A0,A0
745 bne _more
746 mov.w A1,A1
747 beq _done
748 _more: ; if (a & 1)
749 bld #0,A1L
750 bcc _nobit
751 ; r += b
752 add.w A3,S1
753 addx A2L,S0L
754 addx A2H,S0H
755 _nobit:
756 ; a >>= 1
757 shlr A0H
758 rotxr A0L
759 rotxr A1H
760 rotxr A1L
761
762 ; b <<= 1
763 add.w A3,A3
764 addx A2L,A2L
765 addx A2H,A2H
766 bra _top
767
768 _done:
769 mov.w S0,A0
770 mov.w S1,A1
771 POPP S1P
772 POPP S0P
773 rts
774
775 #else /* __H8300H__ */
776
777 ;
778 ; mulsi3 for H8/300H - based on Renesas SH implementation
779 ;
780 ; by Toshiyasu Morita
781 ;
782 ; Old code:
783 ;
784 ; 16b * 16b = 372 states (worst case)
785 ; 32b * 32b = 724 states (worst case)
786 ;
787 ; New code:
788 ;
789 ; 16b * 16b = 48 states
790 ; 16b * 32b = 72 states
791 ; 32b * 32b = 92 states
792 ;
793
794 .global LABEL(mulsi3)
795 LABEL_DEF(mulsi3)
796 mov.w r1,r2 ; ( 2 states) b * d
797 mulxu r0,er2 ; (22 states)
798
799 mov.w e0,r3 ; ( 2 states) a * d
800 beq L_skip1 ; ( 4 states)
801 mulxu r1,er3 ; (22 states)
802 add.w r3,e2 ; ( 2 states)
803
804 L_skip1:
805 mov.w e1,r3 ; ( 2 states) c * b
806 beq L_skip2 ; ( 4 states)
807 mulxu r0,er3 ; (22 states)
808 add.w r3,e2 ; ( 2 states)
809
810 L_skip2:
811 mov.l er2,er0 ; ( 2 states)
812 rts ; (10 states)
813
814 #endif
815 #endif /* L_mulsi3 */
816 #ifdef L_fixunssfsi_asm
817 /* For the h8300 we use asm to save some bytes, to
818 allow more programs to fit into the tiny address
819 space. For the H8/300H and H8S, the C version is good enough. */
820 #ifdef __H8300__
821 /* We still treat NANs different than libgcc2.c, but then, the
822 behavior is undefined anyways. */
823 .global LABEL(fixunssfsi)
824 LABEL_DEF(fixunssfsi)
825 cmp.b #0x4f,r0h
826 bge Large_num
827 jmp @LABEL(fixsfsi)
828 Large_num:
829 bhi L_huge_num
830 xor.b #0x80,A0L
831 bmi L_shift8
832 L_huge_num:
833 mov.w #65535,A0
834 mov.w A0,A1
835 rts
836 L_shift8:
837 mov.b A0L,A0H
838 mov.b A1H,A0L
839 mov.b A1L,A1H
840 mov.b #0,A1L
841 rts
842 #endif
843 #endif /* L_fixunssfsi_asm */