1 ;; libgcc routines for the Renesas H8/300 CPU.
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
5 /* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
6 Free Software Foundation, Inc.
8 This file is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 This file is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* Assembler register definitions. */
70 #if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
89 #ifdef __NORMAL_MODE__
97 #ifdef __NORMAL_MODE__
104 #ifdef __NORMAL_MODE__
135 #endif /* L_cmpsi2 */
161 #endif /* L_ucmpsi2 */
165 ;; HImode divides for the H8/300.
166 ;; We bunch all of this into one object file since there are several
167 ;; "supporting routines".
169 ; general purpose normalize routine
173 ; turns both into +ve numbers, and leaves what the answer sign
180 or A0H,A0H ; is divisor > 0
183 not A0H ; no - then make it +ve
186 _lab1: or A1H,A1H ; look at dividend
188 not A1H ; it is -ve, make it positive
191 xor #0x8,A2L; and toggle sign of result
193 ;; Basically the same, except that the sign of the divisor determines
196 or A0H,A0H ; is divisor > 0
199 not A0H ; no - then make it +ve
202 _lab7: or A1H,A1H ; look at dividend
204 not A1H ; it is -ve, make it positive
215 negans: btst #3,A2L ; should answer be negative ?
217 not A0H ; yes, so make it so
242 ; D high 8 bits of denom
243 ; d low 8 bits of denom
244 ; N high 8 bits of num
245 ; n low 8 bits of num
246 ; M high 8 bits of mod
247 ; m low 8 bits of mod
248 ; Q high 8 bits of quot
249 ; q low 8 bits of quot
252 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
253 ; see how to partition up the expression.
259 sub.w A3,A3 ; Nn Dd xP 00
265 ; we know that D == 0 and N is != 0
266 mov.b A0H,A3L ; Nn Dd xP 0N
270 _lab6: mov.b A0L,A3L ; n
274 mov.b #0x0,A3H ; Qq 0m
277 ; D != 0 - which means the denominator is
278 ; loop around to get the result.
281 mov.b A0H,A3L ; Nn Dd xP 0N
282 mov.b #0x0,A0H ; high byte of answer has to be zero
284 div8: add.b A0L,A0L ; n*=2
285 rotxl A3L ; Make remainder bigger
288 bhs setbit ; set a bit ?
289 add.w A1,A3 ; no : too far , Q+=N
295 setbit: inc A0L ; do insert bit
300 #endif /* __H8300__ */
301 #endif /* L_divhi3 */
305 ;; 4 byte integer divides for the H8/300.
307 ;; We have one routine which does all the work and lots of
308 ;; little ones which prepare the args and massage the sign.
309 ;; We bunch all of this into one object file since there are several
310 ;; "supporting routines".
315 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
316 ; This function is here to keep branch displacements small.
321 mov.b A0H,A0H ; is the numerator -ve
322 stc ccr,S2L ; keep the sign in bit 3 of S2L
336 mov.b A2H,A2H ; is the denominator -ve
346 xor.b #0x08,S2L ; toggle the result sign
350 ;; Basically the same, except that the sign of the divisor determines
353 mov.b A0H,A0H ; is the numerator -ve
354 stc ccr,S2L ; keep the sign in bit 3 of S2L
368 mov.b A2H,A2H ; is the denominator -ve
381 #else /* __H8300H__ */
384 mov.l A0P,A0P ; is the numerator -ve
385 stc ccr,S2L ; keep the sign in bit 3 of S2L
388 neg.l A0P ; negate arg
391 mov.l A1P,A1P ; is the denominator -ve
394 neg.l A1P ; negate arg
395 xor.b #0x08,S2L ; toggle the result sign
400 ;; Basically the same, except that the sign of the divisor determines
403 mov.l A0P,A0P ; is the numerator -ve
404 stc ccr,S2L ; keep the sign in bit 3 of S2L
407 neg.l A0P ; negate arg
410 mov.l A1P,A1P ; is the denominator -ve
413 neg.l A1P ; negate arg
421 ; denominator in A2/A3
441 ;; H8/300H and H8S version of ___udivsi3 is defined later in
483 ; examine what the sign should be
499 #else /* __H8300H__ */
511 ; takes A0/A1 numerator (A0P for H8/300H)
512 ; A2/A3 denominator (A1P for H8/300H)
513 ; returns A0/A1 quotient (A0P for H8/300H)
514 ; S0/S1 remainder (S0P for H8/300H)
520 sub.w S0,S0 ; zero play area
554 ; have to do the divide by shift and test
562 mov.b #24,S2H ; only do 24 iterations
565 add.w A1,A1 ; double the answer guess
569 rotxl S1L ; double remainder
573 sub.w A3,S1 ; does it all fit
578 add.w A3,S1 ; no, restore mistake
592 #else /* __H8300H__ */
594 ;; This function also computes the remainder and stores it in er3.
597 mov.w A1E,A1E ; denominator top word 0?
600 ; do it the easy way, see page 107 in manual
614 ; expects er1 >= 2^16
621 shlr.l er2 ; make divisor < 2^16
625 shlr.l #2,er2 ; make divisor < 2^16
631 shlr.l #2,er2 ; make divisor < 2^16
645 ;; er0 contains shifted dividend
646 ;; er1 contains divisor
647 ;; er2 contains shifted divisor
648 ;; er3 contains dividend, later remainder
649 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
652 subs #1,er0 ; er0 = AQ - 1
654 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
655 sub.w r2,e3 ; dividend - 65536 * er2
657 mulxu.w r0,er2 ; compute er3 = remainder (tentative)
658 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
660 cmp.l er1,er3 ; is divisor < remainder?
663 sub.l er1,er3 ; correct the remainder
668 #endif /* L_divsi3 */
673 ; The H8/300 only has an 8*8->16 multiply.
674 ; The answer is the same as:
676 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
677 ; (we can ignore A1.h * A0.h cause that will all off the top)
687 mov.b A1L,A2L ; A2l gets srcb.l
688 mulxu A0L,A2 ; A2 gets first sub product
690 mov.b A0H,A3L ; prepare for
691 mulxu A1L,A3 ; second sub product
693 add.b A3L,A2H ; sum first two terms
695 mov.b A1H,A3L ; third sub product
698 add.b A3L,A2H ; almost there
699 mov.w A2,A0 ; that is
703 #endif /* L_mulhi3 */
709 ;; I think that shift and add may be sufficient for this. Using the
710 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
711 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
712 ;; quickly on small args.
770 #else /* __H8300H__ */
773 ; mulsi3 for H8/300H - based on Renesas SH implementation
775 ; by Toshiyasu Morita
779 ; 16b * 16b = 372 states (worst case)
780 ; 32b * 32b = 724 states (worst case)
784 ; 16b * 16b = 48 states
785 ; 16b * 32b = 72 states
786 ; 32b * 32b = 92 states
791 mov.w r1,r2 ; ( 2 states) b * d
792 mulxu r0,er2 ; (22 states)
794 mov.w e0,r3 ; ( 2 states) a * d
795 beq L_skip1 ; ( 4 states)
796 mulxu r1,er3 ; (22 states)
797 add.w r3,e2 ; ( 2 states)
800 mov.w e1,r3 ; ( 2 states) c * b
801 beq L_skip2 ; ( 4 states)
802 mulxu r0,er3 ; (22 states)
803 add.w r3,e2 ; ( 2 states)
806 mov.l er2,er0 ; ( 2 states)
810 #endif /* L_mulsi3 */
811 #ifdef L_fixunssfsi_asm
812 /* For the h8300 we use asm to save some bytes, to
813 allow more programs to fit into the tiny address
814 space. For the H8/300H and H8S, the C version is good enough. */
816 /* We still treat NANs different than libgcc2.c, but then, the
817 behavior is undefined anyways. */
818 .global ___fixunssfsi
838 #endif /* L_fixunssfsi_asm */