1 /* Copyright (C) 2004-2019 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 3, or (at your option) any
8 This file is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
13 Under Section 7 of GPL version 3, you are granted additional
14 permissions described in the GCC Runtime Library Exception, version
15 3.1, as published by the Free Software Foundation.
17 You should have received a copy of the GNU General Public License and
18 a copy of the GCC Runtime Library Exception along with this program;
19 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20 <http://www.gnu.org/licenses/>. */
23 /* libgcc routines for the STMicroelectronics ST40-300 CPU.
24 Contributed by J"orn Rennecke joern.rennecke@st.com. */
26 #include "lib1funcs.h"
29 #if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
30 /* This code used shld, thus is not suitable for SH1 / SH2. */
32 /* Signed / unsigned division without use of FPU, optimized for SH4-300.
33 Uses a lookup table for divisors in the range -128 .. +127, and
34 div1 with case distinction for larger divisors in three more ranges.
35 The code is lumped together with the table to allow the use of mova. */
36 #ifdef __LITTLE_ENDIAN__
46 .global GLOBAL(udivsi3_i4i)
47 .global GLOBAL(sdivsi3_i4i)
48 FUNC(GLOBAL(udivsi3_i4i))
49 FUNC(GLOBAL(sdivsi3_i4i))
52 LOCAL(div_ge8m): ! 10 cycles up to here
53 rotcr r1 ! signed shift must use original sign from r4
60 swap.w r5,r0 ! detect -0x80000000 : 0x800000
95 ! 31 cycles up to here
98 LOCAL(udiv_ge64k): ! 3 cycles up to here
106 ! 7 cycles up to here
110 extu.b r4,r1 ! 15 cycles up to here
117 .endr ! 25 cycles up to here
123 rotcl r0 ! 28 cycles up to here
126 LOCAL(udiv_r8): ! 6 cycles up to here
138 ! 12 cycles up to here
142 mov.l @r15+,r6 ! 24 cycles up to here
147 LOCAL(div_ge32k): ! 6 cycles up to here
155 cmp/hi r1,r4 ! copy sign bit of r4 into T
156 rotcr r1 ! signed shift must use original sign from r4
161 mov r5,r7 ! detect r4 == 0x80000000 && r5 == 0x8000(00)
183 neg r7,r7 ! upper 16 bit of r7 == 0 if r4 == 0x80000000 && r5 == 0x8000
187 swap.w r7,r7 ! 26 cycles up to here.
203 shad r1,r5 ! 34 cycles up to here
220 extu.b r4,r0 ! 7 cycles up to here
223 .endr ! 15 cycles up to here
224 xor r1,r0 ! xor dividend with result lsb
228 mov.l r7,@-r15 ! 21 cycles up to here
234 xor r7,r1 ! replace lsb of result with lsb of dividend
241 div1 r6,r1 ! 28 cycles up to here
243 /* This is link-compatible with a GLOBAL(sdivsi3) call,
244 but we effectively clobber only r1, macl and mach */
245 /* Because negative quotients are calculated as one's complements,
246 -0x80000000 divided by the smallest positive number of a number
247 range (0x80, 0x8000, 0x800000) causes saturation in the one's
248 complement representation, and we have to suppress the
249 one's -> two's complement adjustment. Since positive numbers
250 don't get such an adjustment, it's OK to also compute one's -> two's
251 complement adjustment suppression for a dividend of 0. */
258 bt/s LOCAL(div_le128)
264 bf/s LOCAL(div_ge32k)
265 cmp/hi r1,r4 ! copy sign bit of r4 into T
267 shll16 r6 ! 7 cycles up to here
273 mov r4,r0 ! re-compute adjusted dividend
279 add r4,r0 ! adjusted dividend
283 swap.w r4,r8 ! detect special case r4 = 0x80000000, r5 = 0x80
286 xor r1,r0 ! xor dividend with result lsb
291 add #-0x80,r8 ! r8 is 0 iff there is a match
293 swap.w r8,r7 ! or upper 16 bits...
295 or r7,r8 !...into lower 16 bits
303 xor r7,r1 ! replace lsb of result with lsb of dividend
305 neg r8,r8 ! upper 16 bits of r8 are now 0xffff iff we want end adjm.
311 mov.l @r15+,r8 ! 58 insns, 29 cycles up to here
329 LOCAL(udiv_le128): ! 4 cycles up to here (or 7 for mispredict)
330 mova LOCAL(div_table_inv),r0
333 mova LOCAL(div_table_clz),r0
354 LOCAL(div_le128): ! 3 cycles up to here (or 6 for mispredict)
355 mova LOCAL(div_table_inv),r0
358 mova LOCAL(div_table_clz),r0
368 bt/s LOCAL(le128_neg)
378 /* Could trap divide by zero for the cost of one cycle more mispredict penalty:
382 bt/s LOCAL(le128_neg)
384 bt LOCAL(div_by_zero)
393 bt LOCAL(div_by_zero)
408 ENDFUNC(GLOBAL(udivsi3_i4i))
409 ENDFUNC(GLOBAL(sdivsi3_i4i))
411 /* This table has been generated by divtab-sh4.c. */
541 LOCAL(div_table_clz):
670 /* 1/-128 .. 1/127, normalized. There is an implicit leading 1 in bit 32,
671 or in bit 33 for powers of two. */
801 LOCAL(div_table_inv):
930 /* maximum error: 0.987342 scaled: 0.921875*/
932 #endif /* SH3 / SH4 */
934 #endif /* L_div_table */