]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/sh/lib1funcs-Os-4-200.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / sh / lib1funcs-Os-4-200.S
CommitLineData
7adcbafe 1/* Copyright (C) 2006-2022 Free Software Foundation, Inc.
3f1d3526
R
2
3This file is free software; you can redistribute it and/or modify it
4under the terms of the GNU General Public License as published by the
748086b7 5Free Software Foundation; either version 3, or (at your option) any
3f1d3526
R
6later version.
7
3f1d3526
R
8This file is distributed in the hope that it will be useful, but
9WITHOUT ANY WARRANTY; without even the implied warranty of
10MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11General Public License for more details.
12
748086b7
JJ
13Under Section 7 of GPL version 3, you are granted additional
14permissions described in the GCC Runtime Library Exception, version
153.1, as published by the Free Software Foundation.
16
17You should have received a copy of the GNU General Public License and
18a copy of the GCC Runtime Library Exception along with this program;
19see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
20<http://www.gnu.org/licenses/>. */
3f1d3526
R
21
22/* Moderately Space-optimized libgcc routines for the Renesas SH /
23 STMicroelectronics ST40 CPUs.
24 Contributed by J"orn Rennecke joern.rennecke@st.com. */
25
26#include "lib1funcs.h"
27
28#ifdef L_udivsi3_i4i
29
30/* 88 bytes; sh4-200 cycle counts:
31 divisor >= 2G: 11 cycles
32 dividend < 2G: 48 cycles
33 dividend >= 2G: divisor != 1: 54 cycles
34 dividend >= 2G, divisor == 1: 22 cycles */
35#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
36!! args in r4 and r5, result in r0, clobber r1
37
38 .global GLOBAL(udivsi3_i4i)
39 FUNC(GLOBAL(udivsi3_i4i))
40GLOBAL(udivsi3_i4i):
41 mova L1,r0
42 cmp/pz r5
43 sts fpscr,r1
44 lds.l @r0+,fpscr
45 sts.l fpul,@-r15
46 bf LOCAL(huge_divisor)
47 mov.l r1,@-r15
48 lds r4,fpul
49 cmp/pz r4
50#ifdef FMOVD_WORKS
51 fmov.d dr0,@-r15
52 float fpul,dr0
53 fmov.d dr2,@-r15
54 bt LOCAL(dividend_adjusted)
55 mov #1,r1
56 fmov.d @r0,dr2
57 cmp/eq r1,r5
58 bt LOCAL(div_by_1)
59 fadd dr2,dr0
60LOCAL(dividend_adjusted):
61 lds r5,fpul
62 float fpul,dr2
63 fdiv dr2,dr0
64LOCAL(div_by_1):
65 fmov.d @r15+,dr2
66 ftrc dr0,fpul
67 fmov.d @r15+,dr0
68#else /* !FMOVD_WORKS */
69 fmov.s DR01,@-r15
70 mov #1,r1
71 fmov.s DR00,@-r15
72 float fpul,dr0
73 fmov.s DR21,@-r15
74 bt/s LOCAL(dividend_adjusted)
75 fmov.s DR20,@-r15
76 cmp/eq r1,r5
77 bt LOCAL(div_by_1)
78 fmov.s @r0+,DR20
79 fmov.s @r0,DR21
80 fadd dr2,dr0
81LOCAL(dividend_adjusted):
82 lds r5,fpul
83 float fpul,dr2
84 fdiv dr2,dr0
85LOCAL(div_by_1):
86 fmov.s @r15+,DR20
87 fmov.s @r15+,DR21
88 ftrc dr0,fpul
89 fmov.s @r15+,DR00
90 fmov.s @r15+,DR01
91#endif /* !FMOVD_WORKS */
92 lds.l @r15+,fpscr
93 sts fpul,r0
94 rts
95 lds.l @r15+,fpul
96
97#ifdef FMOVD_WORKS
98 .p2align 3 ! make double below 8 byte aligned.
99#endif
100LOCAL(huge_divisor):
101 lds r1,fpscr
102 add #4,r15
103 cmp/hs r5,r4
104 rts
105 movt r0
106
107 .p2align 2
108L1:
109#ifndef FMOVD_WORKS
110 .long 0x80000
111#else
112 .long 0x180000
113#endif
114 .double 4294967296
115
116 ENDFUNC(GLOBAL(udivsi3_i4i))
117#elif !defined (__sh1__) /* !__SH_FPU_DOUBLE__ */
118
119#if 0
120/* With 36 bytes, the following would probably be the most compact
121 implementation, but with 139 cycles on an sh4-200, it is extremely slow. */
122GLOBAL(udivsi3_i4i):
123 mov.l r2,@-r15
124 mov #0,r1
125 div0u
126 mov r1,r2
127 mov.l r3,@-r15
128 mov r1,r3
129 sett
130 mov r4,r0
131LOCAL(loop):
132 rotcr r2
133 ;
134 bt/s LOCAL(end)
135 cmp/gt r2,r3
136 rotcl r0
137 bra LOCAL(loop)
138 div1 r5,r1
139LOCAL(end):
140 rotcl r0
141 mov.l @r15+,r3
142 rts
143 mov.l @r15+,r2
144#endif /* 0 */
145
146/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
147 sh4-200 run times:
148 udiv small divisor: 55 cycles
149 udiv large divisor: 52 cycles
150 sdiv small divisor, positive result: 59 cycles
151 sdiv large divisor, positive result: 56 cycles
152 sdiv small divisor, negative result: 65 cycles (*)
153 sdiv large divisor, negative result: 62 cycles (*)
154 (*): r2 is restored in the rts delay slot and has a lingering latency
155 of two more cycles. */
156 .balign 4
157 .global GLOBAL(udivsi3_i4i)
158 FUNC(GLOBAL(udivsi3_i4i))
159 FUNC(GLOBAL(sdivsi3_i4i))
160GLOBAL(udivsi3_i4i):
161 sts pr,r1
162 mov.l r4,@-r15
163 extu.w r5,r0
164 cmp/eq r5,r0
165 swap.w r4,r0
166 shlr16 r4
167 bf/s LOCAL(large_divisor)
168 div0u
169 mov.l r5,@-r15
170 shll16 r5
171LOCAL(sdiv_small_divisor):
172 div1 r5,r4
173 bsr LOCAL(div6)
174 div1 r5,r4
175 div1 r5,r4
176 bsr LOCAL(div6)
177 div1 r5,r4
178 xtrct r4,r0
179 xtrct r0,r4
180 bsr LOCAL(div7)
181 swap.w r4,r4
182 div1 r5,r4
183 bsr LOCAL(div7)
184 div1 r5,r4
185 xtrct r4,r0
186 mov.l @r15+,r5
187 swap.w r0,r0
188 mov.l @r15+,r4
189 jmp @r1
190 rotcl r0
191LOCAL(div7):
192 div1 r5,r4
193LOCAL(div6):
194 div1 r5,r4; div1 r5,r4; div1 r5,r4
195 div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
196
197LOCAL(divx3):
198 rotcl r0
199 div1 r5,r4
200 rotcl r0
201 div1 r5,r4
202 rotcl r0
203 rts
204 div1 r5,r4
205
206LOCAL(large_divisor):
207 mov.l r5,@-r15
208LOCAL(sdiv_large_divisor):
209 xor r4,r0
210 .rept 4
211 rotcl r0
212 bsr LOCAL(divx3)
213 div1 r5,r4
214 .endr
215 mov.l @r15+,r5
216 mov.l @r15+,r4
217 jmp @r1
218 rotcl r0
219 ENDFUNC(GLOBAL(udivsi3_i4i))
220
221 .global GLOBAL(sdivsi3_i4i)
222GLOBAL(sdivsi3_i4i):
223 mov.l r4,@-r15
224 cmp/pz r5
225 mov.l r5,@-r15
226 bt/s LOCAL(pos_divisor)
227 cmp/pz r4
228 neg r5,r5
229 extu.w r5,r0
230 bt/s LOCAL(neg_result)
231 cmp/eq r5,r0
232 neg r4,r4
233LOCAL(pos_result):
234 swap.w r4,r0
235 bra LOCAL(sdiv_check_divisor)
236 sts pr,r1
237LOCAL(pos_divisor):
238 extu.w r5,r0
239 bt/s LOCAL(pos_result)
240 cmp/eq r5,r0
241 neg r4,r4
242LOCAL(neg_result):
243 mova LOCAL(negate_result),r0
244 ;
245 mov r0,r1
246 swap.w r4,r0
247 lds r2,macl
248 sts pr,r2
249LOCAL(sdiv_check_divisor):
250 shlr16 r4
251 bf/s LOCAL(sdiv_large_divisor)
252 div0u
253 bra LOCAL(sdiv_small_divisor)
254 shll16 r5
255 .balign 4
256LOCAL(negate_result):
257 neg r0,r0
258 jmp @r2
259 sts macl,r2
260 ENDFUNC(GLOBAL(sdivsi3_i4i))
261#endif /* !__SH_FPU_DOUBLE__ */
262#endif /* L_udivsi3_i4i */
263
264#ifdef L_sdivsi3_i4i
265#if defined (__SH_FPU_DOUBLE__) || defined (__SH4_SINGLE_ONLY__)
266/* 48 bytes, 45 cycles on sh4-200 */
267!! args in r4 and r5, result in r0, clobber r1
268
269 .global GLOBAL(sdivsi3_i4i)
270 FUNC(GLOBAL(sdivsi3_i4i))
271GLOBAL(sdivsi3_i4i):
272 sts.l fpscr,@-r15
273 sts fpul,r1
274 mova L1,r0
275 lds.l @r0+,fpscr
276 lds r4,fpul
277#ifdef FMOVD_WORKS
278 fmov.d dr0,@-r15
279 float fpul,dr0
280 lds r5,fpul
281 fmov.d dr2,@-r15
282#else
283 fmov.s DR01,@-r15
284 fmov.s DR00,@-r15
285 float fpul,dr0
286 lds r5,fpul
287 fmov.s DR21,@-r15
288 fmov.s DR20,@-r15
289#endif
290 float fpul,dr2
291 fdiv dr2,dr0
292#ifdef FMOVD_WORKS
293 fmov.d @r15+,dr2
294#else
295 fmov.s @r15+,DR20
296 fmov.s @r15+,DR21
297#endif
298 ftrc dr0,fpul
299#ifdef FMOVD_WORKS
300 fmov.d @r15+,dr0
301#else
302 fmov.s @r15+,DR00
303 fmov.s @r15+,DR01
304#endif
305 lds.l @r15+,fpscr
306 sts fpul,r0
307 rts
308 lds r1,fpul
309
310 .p2align 2
311L1:
312#ifndef FMOVD_WORKS
313 .long 0x80000
314#else
315 .long 0x180000
316#endif
317
318 ENDFUNC(GLOBAL(sdivsi3_i4i))
319#endif /* __SH_FPU_DOUBLE__ */
320#endif /* L_sdivsi3_i4i */