]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/c6x/lib1funcs.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / c6x / lib1funcs.S
1 /* Copyright (C) 2010-2019 Free Software Foundation, Inc.
2 Contributed by Bernd Schmidt <bernds@codesourcery.com>.
3
4 This file is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 3, or (at your option) any
7 later version.
8
9 This file is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
13
14 Under Section 7 of GPL version 3, you are granted additional
15 permissions described in the GCC Runtime Library Exception, version
16 3.1, as published by the Free Software Foundation.
17
18 You should have received a copy of the GNU General Public License and
19 a copy of the GCC Runtime Library Exception along with this program;
20 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
21 <http://www.gnu.org/licenses/>. */
22
23 ;; ABI considerations for the divide functions
24 ;; The following registers are call-used:
25 ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
26 ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
27 ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
28 ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
29 ;;
30 ;; In our implementation, divu and remu are leaf functions,
31 ;; while both divi and remi call into divu.
32 ;; A0 is not clobbered by any of the functions.
33 ;; divu does not clobber B2 either, which is taken advantage of
34 ;; in remi.
35 ;; divi uses B5 to hold the original return address during
36 ;; the call to divu.
37 ;; remi uses B2 and A5 to hold the input values during the
38 ;; call to divu. It stores B3 in on the stack.
39
40 #ifdef L_divsi3
41 .text
42 .align 2
43 .global __c6xabi_divi
44 .hidden __c6xabi_divi
45 .type __c6xabi_divi, STT_FUNC
46
47 __c6xabi_divi:
48 call .s2 __c6xabi_divu
49 || mv .d2 B3, B5
50 || cmpgt .l1 0, A4, A1
51 || cmpgt .l2 0, B4, B1
52
53 [A1] neg .l1 A4, A4
54 || [B1] neg .l2 B4, B4
55 || xor .s1x A1, B1, A1
56
57 #ifdef _TMS320C6400
58 [A1] addkpc .s2 1f, B3, 4
59 #else
60 [A1] mvkl .s2 1f, B3
61 [A1] mvkh .s2 1f, B3
62 nop 2
63 #endif
64 1:
65 neg .l1 A4, A4
66 || mv .l2 B3,B5
67 || ret .s2 B5
68 nop 5
69 #endif
70
71 #if defined L_modsi3 || defined L_divmodsi4
72 .align 2
73 #ifdef L_modsi3
74 #define MOD_OUTPUT_REG A4
75 .global __c6xabi_remi
76 .hidden __c6xabi_remi
77 .type __c6xabi_remi, STT_FUNC
78 #else
79 #define MOD_OUTPUT_REG A5
80 .global __c6xabi_divremi
81 .hidden __c6xabi_divremi
82 .type __c6xabi_divremi, STT_FUNC
83 __c6xabi_divremi:
84 #endif
85
86 __c6xabi_remi:
87 stw .d2t2 B3, *B15--[2]
88 || cmpgt .l1 0, A4, A1
89 || cmpgt .l2 0, B4, B2
90 || mv .s1 A4, A5
91 || call .s2 __c6xabi_divu
92
93 [A1] neg .l1 A4, A4
94 || [B2] neg .l2 B4, B4
95 || xor .s2x B2, A1, B0
96 || mv .d2 B4, B2
97
98 #ifdef _TMS320C6400
99 [B0] addkpc .s2 1f, B3, 1
100 [!B0] addkpc .s2 2f, B3, 1
101 nop 2
102 #else
103 [B0] mvkl .s2 1f,B3
104 [!B0] mvkl .s2 2f,B3
105
106 [B0] mvkh .s2 1f,B3
107 [!B0] mvkh .s2 2f,B3
108 #endif
109 1:
110 neg .l1 A4, A4
111 2:
112 ldw .d2t2 *++B15[2], B3
113
114 #ifdef _TMS320C6400_PLUS
115 mpy32 .m1x A4, B2, A6
116 nop 3
117 ret .s2 B3
118 sub .l1 A5, A6, MOD_OUTPUT_REG
119 nop 4
120 #else
121 mpyu .m1x A4, B2, A1
122 nop 1
123 mpylhu .m1x A4, B2, A6
124 || mpylhu .m2x B2, A4, B2
125 nop 1
126 add .l1x A6, B2, A6
127 || ret .s2 B3
128 shl .s1 A6, 16, A6
129 add .d1 A6, A1, A6
130 sub .l1 A5, A6, MOD_OUTPUT_REG
131 nop 2
132 #endif
133
134 #endif
135
136 #if defined L_udivsi3 || defined L_udivmodsi4
137 .align 2
138 #ifdef L_udivsi3
139 .global __c6xabi_divu
140 .hidden __c6xabi_divu
141 .type __c6xabi_divu, STT_FUNC
142 __c6xabi_divu:
143 #else
144 .global __c6xabi_divremu
145 .hidden __c6xabi_divremu
146 .type __c6xabi_divremu, STT_FUNC
147 __c6xabi_divremu:
148 #endif
149 ;; We use a series of up to 31 subc instructions. First, we find
150 ;; out how many leading zero bits there are in the divisor. This
151 ;; gives us both a shift count for aligning (shifting) the divisor
152 ;; to the, and the number of times we have to execute subc.
153
154 ;; At the end, we have both the remainder and most of the quotient
155 ;; in A4. The top bit of the quotient is computed first and is
156 ;; placed in A2.
157
158 ;; Return immediately if the dividend is zero. Setting B4 to 1
159 ;; is a trick to allow us to leave the following insns in the jump
160 ;; delay slot without affecting the result.
161 mv .s2x A4, B1
162
163 #ifndef _TMS320C6400
164 [!b1] mvk .s2 1, B4
165 #endif
166 [b1] lmbd .l2 1, B4, B1
167 ||[!b1] b .s2 B3 ; RETURN A
168 #ifdef _TMS320C6400
169 ||[!b1] mvk .d2 1, B4
170 #endif
171 #ifdef L_udivmodsi4
172 ||[!b1] zero .s1 A5
173 #endif
174 mv .l1x B1, A6
175 || shl .s2 B4, B1, B4
176
177 ;; The loop performs a maximum of 28 steps, so we do the
178 ;; first 3 here.
179 cmpltu .l1x A4, B4, A2
180 [!A2] sub .l1x A4, B4, A4
181 || shru .s2 B4, 1, B4
182 || xor .s1 1, A2, A2
183
184 shl .s1 A2, 31, A2
185 || [b1] subc .l1x A4,B4,A4
186 || [b1] add .s2 -1, B1, B1
187 [b1] subc .l1x A4,B4,A4
188 || [b1] add .s2 -1, B1, B1
189
190 ;; RETURN A may happen here (note: must happen before the next branch)
191 0:
192 cmpgt .l2 B1, 7, B0
193 || [b1] subc .l1x A4,B4,A4
194 || [b1] add .s2 -1, B1, B1
195 [b1] subc .l1x A4,B4,A4
196 || [b1] add .s2 -1, B1, B1
197 || [b0] b .s1 0b
198 [b1] subc .l1x A4,B4,A4
199 || [b1] add .s2 -1, B1, B1
200 [b1] subc .l1x A4,B4,A4
201 || [b1] add .s2 -1, B1, B1
202 [b1] subc .l1x A4,B4,A4
203 || [b1] add .s2 -1, B1, B1
204 [b1] subc .l1x A4,B4,A4
205 || [b1] add .s2 -1, B1, B1
206 [b1] subc .l1x A4,B4,A4
207 || [b1] add .s2 -1, B1, B1
208 ;; loop backwards branch happens here
209
210 ret .s2 B3
211 || mvk .s1 32, A1
212 sub .l1 A1, A6, A6
213 #ifdef L_udivmodsi4
214 || extu .s1 A4, A6, A5
215 #endif
216 shl .s1 A4, A6, A4
217 shru .s1 A4, 1, A4
218 || sub .l1 A6, 1, A6
219 or .l1 A2, A4, A4
220 shru .s1 A4, A6, A4
221 nop
222
223 #endif
224
225 #ifdef L_umodsi3
226 .align 2
227 .global __c6xabi_remu
228 .hidden __c6xabi_remu
229 .type __c6xabi_remu, STT_FUNC
230 __c6xabi_remu:
231 ;; The ABI seems designed to prevent these functions calling each other,
232 ;; so we duplicate most of the divsi3 code here.
233 mv .s2x A4, B1
234 #ifndef _TMS320C6400
235 [!b1] mvk .s2 1, B4
236 #endif
237 lmbd .l2 1, B4, B1
238 ||[!b1] b .s2 B3 ; RETURN A
239 #ifdef _TMS320C6400
240 ||[!b1] mvk .d2 1, B4
241 #endif
242
243 mv .l1x B1, A7
244 || shl .s2 B4, B1, B4
245
246 cmpltu .l1x A4, B4, A1
247 [!a1] sub .l1x A4, B4, A4
248 shru .s2 B4, 1, B4
249
250 0:
251 cmpgt .l2 B1, 7, B0
252 || [b1] subc .l1x A4,B4,A4
253 || [b1] add .s2 -1, B1, B1
254 ;; RETURN A may happen here (note: must happen before the next branch)
255 [b1] subc .l1x A4,B4,A4
256 || [b1] add .s2 -1, B1, B1
257 || [b0] b .s1 0b
258 [b1] subc .l1x A4,B4,A4
259 || [b1] add .s2 -1, B1, B1
260 [b1] subc .l1x A4,B4,A4
261 || [b1] add .s2 -1, B1, B1
262 [b1] subc .l1x A4,B4,A4
263 || [b1] add .s2 -1, B1, B1
264 [b1] subc .l1x A4,B4,A4
265 || [b1] add .s2 -1, B1, B1
266 [b1] subc .l1x A4,B4,A4
267 || [b1] add .s2 -1, B1, B1
268 ;; loop backwards branch happens here
269
270 ret .s2 B3
271 [b1] subc .l1x A4,B4,A4
272 || [b1] add .s2 -1, B1, B1
273 [b1] subc .l1x A4,B4,A4
274
275 extu .s1 A4, A7, A4
276 nop 2
277 #endif
278
279 #if defined L_strasgi_64plus && defined _TMS320C6400_PLUS
280
281 .align 2
282 .global __c6xabi_strasgi_64plus
283 .hidden __c6xabi_strasgi_64plus
284 .type __c6xabi_strasgi_64plus, STT_FUNC
285 __c6xabi_strasgi_64plus:
286 shru .s2x a6, 2, b31
287 || mv .s1 a4, a30
288 || mv .d2 b4, b30
289
290 add .s2 -4, b31, b31
291
292 sploopd 1
293 || mvc .s2 b31, ilc
294 ldw .d2t2 *b30++, b31
295 nop 4
296 mv .s1x b31,a31
297 spkernel 6, 0
298 || stw .d1t1 a31, *a30++
299
300 ret .s2 b3
301 nop 5
302 #endif
303
304 #ifdef L_strasgi
305 .global __c6xabi_strasgi
306 .type __c6xabi_strasgi, STT_FUNC
307 __c6xabi_strasgi:
308 ;; This is essentially memcpy, with alignment known to be at least
309 ;; 4, and the size a multiple of 4 greater than or equal to 28.
310 ldw .d2t1 *B4++, A0
311 || mvk .s2 16, B1
312 ldw .d2t1 *B4++, A1
313 || mvk .s2 20, B2
314 || sub .d1 A6, 24, A6
315 ldw .d2t1 *B4++, A5
316 ldw .d2t1 *B4++, A7
317 || mv .l2x A6, B7
318 ldw .d2t1 *B4++, A8
319 ldw .d2t1 *B4++, A9
320 || mv .s2x A0, B5
321 || cmpltu .l2 B2, B7, B0
322
323 0:
324 stw .d1t2 B5, *A4++
325 ||[b0] ldw .d2t1 *B4++, A0
326 || mv .s2x A1, B5
327 || mv .l2 B7, B6
328
329 [b0] sub .d2 B6, 24, B7
330 ||[b0] b .s2 0b
331 || cmpltu .l2 B1, B6, B0
332
333 [b0] ldw .d2t1 *B4++, A1
334 || stw .d1t2 B5, *A4++
335 || mv .s2x A5, B5
336 || cmpltu .l2 12, B6, B0
337
338 [b0] ldw .d2t1 *B4++, A5
339 || stw .d1t2 B5, *A4++
340 || mv .s2x A7, B5
341 || cmpltu .l2 8, B6, B0
342
343 [b0] ldw .d2t1 *B4++, A7
344 || stw .d1t2 B5, *A4++
345 || mv .s2x A8, B5
346 || cmpltu .l2 4, B6, B0
347
348 [b0] ldw .d2t1 *B4++, A8
349 || stw .d1t2 B5, *A4++
350 || mv .s2x A9, B5
351 || cmpltu .l2 0, B6, B0
352
353 [b0] ldw .d2t1 *B4++, A9
354 || stw .d1t2 B5, *A4++
355 || mv .s2x A0, B5
356 || cmpltu .l2 B2, B7, B0
357
358 ;; loop back branch happens here
359
360 cmpltu .l2 B1, B6, B0
361 || ret .s2 b3
362
363 [b0] stw .d1t1 A1, *A4++
364 || cmpltu .l2 12, B6, B0
365 [b0] stw .d1t1 A5, *A4++
366 || cmpltu .l2 8, B6, B0
367 [b0] stw .d1t1 A7, *A4++
368 || cmpltu .l2 4, B6, B0
369 [b0] stw .d1t1 A8, *A4++
370 || cmpltu .l2 0, B6, B0
371 [b0] stw .d1t1 A9, *A4++
372
373 ;; return happens here
374
375 #endif
376
377 #ifdef _TMS320C6400_PLUS
378 #ifdef L_push_rts
379 .align 2
380 .global __c6xabi_push_rts
381 .hidden __c6xabi_push_rts
382 .type __c6xabi_push_rts, STT_FUNC
383 __c6xabi_push_rts:
384 stw .d2t2 B14, *B15--[2]
385 stdw .d2t1 A15:A14, *B15--
386 || b .s2x A3
387 stdw .d2t2 B13:B12, *B15--
388 stdw .d2t1 A13:A12, *B15--
389 stdw .d2t2 B11:B10, *B15--
390 stdw .d2t1 A11:A10, *B15--
391 stdw .d2t2 B3:B2, *B15--
392 #endif
393
394 #ifdef L_pop_rts
395 .align 2
396 .global __c6xabi_pop_rts
397 .hidden __c6xabi_pop_rts
398 .type __c6xabi_pop_rts, STT_FUNC
399 __c6xabi_pop_rts:
400 lddw .d2t2 *++B15, B3:B2
401 lddw .d2t1 *++B15, A11:A10
402 lddw .d2t2 *++B15, B11:B10
403 lddw .d2t1 *++B15, A13:A12
404 lddw .d2t2 *++B15, B13:B12
405 lddw .d2t1 *++B15, A15:A14
406 || b .s2 B3
407 ldw .d2t2 *++B15[2], B14
408 nop 4
409 #endif
410
411 #ifdef L_call_stub
412 .align 2
413 .global __c6xabi_call_stub
414 .type __c6xabi_call_stub, STT_FUNC
415 __c6xabi_call_stub:
416 stw .d2t1 A2, *B15--[2]
417 stdw .d2t1 A7:A6, *B15--
418 || call .s2 B31
419 stdw .d2t1 A1:A0, *B15--
420 stdw .d2t2 B7:B6, *B15--
421 stdw .d2t2 B5:B4, *B15--
422 stdw .d2t2 B1:B0, *B15--
423 stdw .d2t2 B3:B2, *B15--
424 || addkpc .s2 1f, B3, 0
425 1:
426 lddw .d2t2 *++B15, B3:B2
427 lddw .d2t2 *++B15, B1:B0
428 lddw .d2t2 *++B15, B5:B4
429 lddw .d2t2 *++B15, B7:B6
430 lddw .d2t1 *++B15, A1:A0
431 lddw .d2t1 *++B15, A7:A6
432 || b .s2 B3
433 ldw .d2t1 *++B15[2], A2
434 nop 4
435 #endif
436
437 #endif
438