]>
Commit | Line | Data |
---|---|---|
8d9254fc | 1 | /* Copyright (C) 2010-2020 Free Software Foundation, Inc. |
bcead286 BS |
2 | Contributed by Bernd Schmidt <bernds@codesourcery.com>. |
3 | ||
4 | This file is free software; you can redistribute it and/or modify it | |
5 | under the terms of the GNU General Public License as published by the | |
6 | Free Software Foundation; either version 3, or (at your option) any | |
7 | later version. | |
8 | ||
9 | This file is distributed in the hope that it will be useful, but | |
10 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | General Public License for more details. | |
13 | ||
14 | Under Section 7 of GPL version 3, you are granted additional | |
15 | permissions described in the GCC Runtime Library Exception, version | |
16 | 3.1, as published by the Free Software Foundation. | |
17 | ||
18 | You should have received a copy of the GNU General Public License and | |
19 | a copy of the GCC Runtime Library Exception along with this program; | |
20 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
21 | <http://www.gnu.org/licenses/>. */ | |
22 | ||
23 | ;; ABI considerations for the divide functions | |
24 | ;; The following registers are call-used: | |
25 | ;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5 | |
26 | ;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4 | |
27 | ;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4 | |
28 | ;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4 | |
29 | ;; | |
30 | ;; In our implementation, divu and remu are leaf functions, | |
31 | ;; while both divi and remi call into divu. | |
32 | ;; A0 is not clobbered by any of the functions. | |
33 | ;; divu does not clobber B2 either, which is taken advantage of | |
34 | ;; in remi. | |
35 | ;; divi uses B5 to hold the original return address during | |
36 | ;; the call to divu. | |
37 | ;; remi uses B2 and A5 to hold the input values during the | |
38 | ;; call to divu. It stores B3 in on the stack. | |
39 | ||
40 | #ifdef L_divsi3 | |
41 | .text | |
42 | .align 2 | |
43 | .global __c6xabi_divi | |
44 | .hidden __c6xabi_divi | |
45 | .type __c6xabi_divi, STT_FUNC | |
46 | ||
47 | __c6xabi_divi: | |
48 | call .s2 __c6xabi_divu | |
49 | || mv .d2 B3, B5 | |
50 | || cmpgt .l1 0, A4, A1 | |
51 | || cmpgt .l2 0, B4, B1 | |
52 | ||
53 | [A1] neg .l1 A4, A4 | |
54 | || [B1] neg .l2 B4, B4 | |
55 | || xor .s1x A1, B1, A1 | |
56 | ||
57 | #ifdef _TMS320C6400 | |
58 | [A1] addkpc .s2 1f, B3, 4 | |
59 | #else | |
60 | [A1] mvkl .s2 1f, B3 | |
61 | [A1] mvkh .s2 1f, B3 | |
62 | nop 2 | |
63 | #endif | |
64 | 1: | |
65 | neg .l1 A4, A4 | |
66 | || mv .l2 B3,B5 | |
67 | || ret .s2 B5 | |
68 | nop 5 | |
69 | #endif | |
70 | ||
71 | #if defined L_modsi3 || defined L_divmodsi4 | |
72 | .align 2 | |
73 | #ifdef L_modsi3 | |
74 | #define MOD_OUTPUT_REG A4 | |
75 | .global __c6xabi_remi | |
76 | .hidden __c6xabi_remi | |
77 | .type __c6xabi_remi, STT_FUNC | |
78 | #else | |
79 | #define MOD_OUTPUT_REG A5 | |
80 | .global __c6xabi_divremi | |
81 | .hidden __c6xabi_divremi | |
82 | .type __c6xabi_divremi, STT_FUNC | |
83 | __c6xabi_divremi: | |
84 | #endif | |
85 | ||
86 | __c6xabi_remi: | |
87 | stw .d2t2 B3, *B15--[2] | |
88 | || cmpgt .l1 0, A4, A1 | |
89 | || cmpgt .l2 0, B4, B2 | |
90 | || mv .s1 A4, A5 | |
91 | || call .s2 __c6xabi_divu | |
92 | ||
93 | [A1] neg .l1 A4, A4 | |
94 | || [B2] neg .l2 B4, B4 | |
95 | || xor .s2x B2, A1, B0 | |
96 | || mv .d2 B4, B2 | |
97 | ||
98 | #ifdef _TMS320C6400 | |
99 | [B0] addkpc .s2 1f, B3, 1 | |
100 | [!B0] addkpc .s2 2f, B3, 1 | |
101 | nop 2 | |
102 | #else | |
103 | [B0] mvkl .s2 1f,B3 | |
104 | [!B0] mvkl .s2 2f,B3 | |
105 | ||
106 | [B0] mvkh .s2 1f,B3 | |
107 | [!B0] mvkh .s2 2f,B3 | |
108 | #endif | |
109 | 1: | |
110 | neg .l1 A4, A4 | |
111 | 2: | |
112 | ldw .d2t2 *++B15[2], B3 | |
113 | ||
114 | #ifdef _TMS320C6400_PLUS | |
115 | mpy32 .m1x A4, B2, A6 | |
116 | nop 3 | |
117 | ret .s2 B3 | |
118 | sub .l1 A5, A6, MOD_OUTPUT_REG | |
119 | nop 4 | |
120 | #else | |
121 | mpyu .m1x A4, B2, A1 | |
122 | nop 1 | |
123 | mpylhu .m1x A4, B2, A6 | |
124 | || mpylhu .m2x B2, A4, B2 | |
125 | nop 1 | |
126 | add .l1x A6, B2, A6 | |
127 | || ret .s2 B3 | |
128 | shl .s1 A6, 16, A6 | |
129 | add .d1 A6, A1, A6 | |
130 | sub .l1 A5, A6, MOD_OUTPUT_REG | |
131 | nop 2 | |
132 | #endif | |
133 | ||
134 | #endif | |
135 | ||
136 | #if defined L_udivsi3 || defined L_udivmodsi4 | |
137 | .align 2 | |
138 | #ifdef L_udivsi3 | |
139 | .global __c6xabi_divu | |
140 | .hidden __c6xabi_divu | |
141 | .type __c6xabi_divu, STT_FUNC | |
142 | __c6xabi_divu: | |
143 | #else | |
144 | .global __c6xabi_divremu | |
145 | .hidden __c6xabi_divremu | |
146 | .type __c6xabi_divremu, STT_FUNC | |
147 | __c6xabi_divremu: | |
148 | #endif | |
149 | ;; We use a series of up to 31 subc instructions. First, we find | |
150 | ;; out how many leading zero bits there are in the divisor. This | |
151 | ;; gives us both a shift count for aligning (shifting) the divisor | |
152 | ;; to the, and the number of times we have to execute subc. | |
153 | ||
154 | ;; At the end, we have both the remainder and most of the quotient | |
155 | ;; in A4. The top bit of the quotient is computed first and is | |
156 | ;; placed in A2. | |
157 | ||
158 | ;; Return immediately if the dividend is zero. Setting B4 to 1 | |
159 | ;; is a trick to allow us to leave the following insns in the jump | |
160 | ;; delay slot without affecting the result. | |
161 | mv .s2x A4, B1 | |
162 | ||
163 | #ifndef _TMS320C6400 | |
164 | [!b1] mvk .s2 1, B4 | |
165 | #endif | |
166 | [b1] lmbd .l2 1, B4, B1 | |
167 | ||[!b1] b .s2 B3 ; RETURN A | |
168 | #ifdef _TMS320C6400 | |
169 | ||[!b1] mvk .d2 1, B4 | |
170 | #endif | |
171 | #ifdef L_udivmodsi4 | |
172 | ||[!b1] zero .s1 A5 | |
173 | #endif | |
174 | mv .l1x B1, A6 | |
175 | || shl .s2 B4, B1, B4 | |
176 | ||
177 | ;; The loop performs a maximum of 28 steps, so we do the | |
178 | ;; first 3 here. | |
179 | cmpltu .l1x A4, B4, A2 | |
180 | [!A2] sub .l1x A4, B4, A4 | |
181 | || shru .s2 B4, 1, B4 | |
182 | || xor .s1 1, A2, A2 | |
183 | ||
184 | shl .s1 A2, 31, A2 | |
185 | || [b1] subc .l1x A4,B4,A4 | |
186 | || [b1] add .s2 -1, B1, B1 | |
187 | [b1] subc .l1x A4,B4,A4 | |
188 | || [b1] add .s2 -1, B1, B1 | |
189 | ||
190 | ;; RETURN A may happen here (note: must happen before the next branch) | |
191 | 0: | |
192 | cmpgt .l2 B1, 7, B0 | |
193 | || [b1] subc .l1x A4,B4,A4 | |
194 | || [b1] add .s2 -1, B1, B1 | |
195 | [b1] subc .l1x A4,B4,A4 | |
196 | || [b1] add .s2 -1, B1, B1 | |
197 | || [b0] b .s1 0b | |
198 | [b1] subc .l1x A4,B4,A4 | |
199 | || [b1] add .s2 -1, B1, B1 | |
200 | [b1] subc .l1x A4,B4,A4 | |
201 | || [b1] add .s2 -1, B1, B1 | |
202 | [b1] subc .l1x A4,B4,A4 | |
203 | || [b1] add .s2 -1, B1, B1 | |
204 | [b1] subc .l1x A4,B4,A4 | |
205 | || [b1] add .s2 -1, B1, B1 | |
206 | [b1] subc .l1x A4,B4,A4 | |
207 | || [b1] add .s2 -1, B1, B1 | |
208 | ;; loop backwards branch happens here | |
209 | ||
210 | ret .s2 B3 | |
211 | || mvk .s1 32, A1 | |
212 | sub .l1 A1, A6, A6 | |
213 | #ifdef L_udivmodsi4 | |
214 | || extu .s1 A4, A6, A5 | |
215 | #endif | |
216 | shl .s1 A4, A6, A4 | |
217 | shru .s1 A4, 1, A4 | |
218 | || sub .l1 A6, 1, A6 | |
219 | or .l1 A2, A4, A4 | |
220 | shru .s1 A4, A6, A4 | |
221 | nop | |
222 | ||
223 | #endif | |
224 | ||
225 | #ifdef L_umodsi3 | |
226 | .align 2 | |
227 | .global __c6xabi_remu | |
228 | .hidden __c6xabi_remu | |
229 | .type __c6xabi_remu, STT_FUNC | |
230 | __c6xabi_remu: | |
231 | ;; The ABI seems designed to prevent these functions calling each other, | |
232 | ;; so we duplicate most of the divsi3 code here. | |
233 | mv .s2x A4, B1 | |
234 | #ifndef _TMS320C6400 | |
235 | [!b1] mvk .s2 1, B4 | |
236 | #endif | |
237 | lmbd .l2 1, B4, B1 | |
238 | ||[!b1] b .s2 B3 ; RETURN A | |
239 | #ifdef _TMS320C6400 | |
240 | ||[!b1] mvk .d2 1, B4 | |
241 | #endif | |
242 | ||
243 | mv .l1x B1, A7 | |
244 | || shl .s2 B4, B1, B4 | |
245 | ||
246 | cmpltu .l1x A4, B4, A1 | |
247 | [!a1] sub .l1x A4, B4, A4 | |
248 | shru .s2 B4, 1, B4 | |
249 | ||
250 | 0: | |
251 | cmpgt .l2 B1, 7, B0 | |
252 | || [b1] subc .l1x A4,B4,A4 | |
253 | || [b1] add .s2 -1, B1, B1 | |
254 | ;; RETURN A may happen here (note: must happen before the next branch) | |
255 | [b1] subc .l1x A4,B4,A4 | |
256 | || [b1] add .s2 -1, B1, B1 | |
257 | || [b0] b .s1 0b | |
258 | [b1] subc .l1x A4,B4,A4 | |
259 | || [b1] add .s2 -1, B1, B1 | |
260 | [b1] subc .l1x A4,B4,A4 | |
261 | || [b1] add .s2 -1, B1, B1 | |
262 | [b1] subc .l1x A4,B4,A4 | |
263 | || [b1] add .s2 -1, B1, B1 | |
264 | [b1] subc .l1x A4,B4,A4 | |
265 | || [b1] add .s2 -1, B1, B1 | |
266 | [b1] subc .l1x A4,B4,A4 | |
267 | || [b1] add .s2 -1, B1, B1 | |
268 | ;; loop backwards branch happens here | |
269 | ||
270 | ret .s2 B3 | |
271 | [b1] subc .l1x A4,B4,A4 | |
272 | || [b1] add .s2 -1, B1, B1 | |
273 | [b1] subc .l1x A4,B4,A4 | |
274 | ||
275 | extu .s1 A4, A7, A4 | |
276 | nop 2 | |
277 | #endif | |
278 | ||
279 | #if defined L_strasgi_64plus && defined _TMS320C6400_PLUS | |
280 | ||
281 | .align 2 | |
282 | .global __c6xabi_strasgi_64plus | |
283 | .hidden __c6xabi_strasgi_64plus | |
284 | .type __c6xabi_strasgi_64plus, STT_FUNC | |
285 | __c6xabi_strasgi_64plus: | |
286 | shru .s2x a6, 2, b31 | |
287 | || mv .s1 a4, a30 | |
288 | || mv .d2 b4, b30 | |
289 | ||
290 | add .s2 -4, b31, b31 | |
291 | ||
292 | sploopd 1 | |
293 | || mvc .s2 b31, ilc | |
294 | ldw .d2t2 *b30++, b31 | |
295 | nop 4 | |
296 | mv .s1x b31,a31 | |
297 | spkernel 6, 0 | |
298 | || stw .d1t1 a31, *a30++ | |
299 | ||
300 | ret .s2 b3 | |
301 | nop 5 | |
302 | #endif | |
303 | ||
304 | #ifdef L_strasgi | |
305 | .global __c6xabi_strasgi | |
306 | .type __c6xabi_strasgi, STT_FUNC | |
307 | __c6xabi_strasgi: | |
308 | ;; This is essentially memcpy, with alignment known to be at least | |
309 | ;; 4, and the size a multiple of 4 greater than or equal to 28. | |
310 | ldw .d2t1 *B4++, A0 | |
311 | || mvk .s2 16, B1 | |
312 | ldw .d2t1 *B4++, A1 | |
313 | || mvk .s2 20, B2 | |
314 | || sub .d1 A6, 24, A6 | |
315 | ldw .d2t1 *B4++, A5 | |
316 | ldw .d2t1 *B4++, A7 | |
317 | || mv .l2x A6, B7 | |
318 | ldw .d2t1 *B4++, A8 | |
319 | ldw .d2t1 *B4++, A9 | |
320 | || mv .s2x A0, B5 | |
321 | || cmpltu .l2 B2, B7, B0 | |
322 | ||
323 | 0: | |
324 | stw .d1t2 B5, *A4++ | |
325 | ||[b0] ldw .d2t1 *B4++, A0 | |
326 | || mv .s2x A1, B5 | |
327 | || mv .l2 B7, B6 | |
328 | ||
329 | [b0] sub .d2 B6, 24, B7 | |
330 | ||[b0] b .s2 0b | |
331 | || cmpltu .l2 B1, B6, B0 | |
332 | ||
333 | [b0] ldw .d2t1 *B4++, A1 | |
334 | || stw .d1t2 B5, *A4++ | |
335 | || mv .s2x A5, B5 | |
336 | || cmpltu .l2 12, B6, B0 | |
337 | ||
338 | [b0] ldw .d2t1 *B4++, A5 | |
339 | || stw .d1t2 B5, *A4++ | |
340 | || mv .s2x A7, B5 | |
341 | || cmpltu .l2 8, B6, B0 | |
342 | ||
343 | [b0] ldw .d2t1 *B4++, A7 | |
344 | || stw .d1t2 B5, *A4++ | |
345 | || mv .s2x A8, B5 | |
346 | || cmpltu .l2 4, B6, B0 | |
347 | ||
348 | [b0] ldw .d2t1 *B4++, A8 | |
349 | || stw .d1t2 B5, *A4++ | |
350 | || mv .s2x A9, B5 | |
351 | || cmpltu .l2 0, B6, B0 | |
352 | ||
353 | [b0] ldw .d2t1 *B4++, A9 | |
354 | || stw .d1t2 B5, *A4++ | |
355 | || mv .s2x A0, B5 | |
356 | || cmpltu .l2 B2, B7, B0 | |
357 | ||
358 | ;; loop back branch happens here | |
359 | ||
360 | cmpltu .l2 B1, B6, B0 | |
361 | || ret .s2 b3 | |
362 | ||
363 | [b0] stw .d1t1 A1, *A4++ | |
364 | || cmpltu .l2 12, B6, B0 | |
365 | [b0] stw .d1t1 A5, *A4++ | |
366 | || cmpltu .l2 8, B6, B0 | |
367 | [b0] stw .d1t1 A7, *A4++ | |
368 | || cmpltu .l2 4, B6, B0 | |
369 | [b0] stw .d1t1 A8, *A4++ | |
370 | || cmpltu .l2 0, B6, B0 | |
371 | [b0] stw .d1t1 A9, *A4++ | |
372 | ||
373 | ;; return happens here | |
374 | ||
375 | #endif | |
376 | ||
377 | #ifdef _TMS320C6400_PLUS | |
378 | #ifdef L_push_rts | |
379 | .align 2 | |
380 | .global __c6xabi_push_rts | |
381 | .hidden __c6xabi_push_rts | |
382 | .type __c6xabi_push_rts, STT_FUNC | |
383 | __c6xabi_push_rts: | |
384 | stw .d2t2 B14, *B15--[2] | |
385 | stdw .d2t1 A15:A14, *B15-- | |
386 | || b .s2x A3 | |
387 | stdw .d2t2 B13:B12, *B15-- | |
388 | stdw .d2t1 A13:A12, *B15-- | |
389 | stdw .d2t2 B11:B10, *B15-- | |
390 | stdw .d2t1 A11:A10, *B15-- | |
391 | stdw .d2t2 B3:B2, *B15-- | |
392 | #endif | |
393 | ||
394 | #ifdef L_pop_rts | |
395 | .align 2 | |
396 | .global __c6xabi_pop_rts | |
397 | .hidden __c6xabi_pop_rts | |
398 | .type __c6xabi_pop_rts, STT_FUNC | |
399 | __c6xabi_pop_rts: | |
400 | lddw .d2t2 *++B15, B3:B2 | |
401 | lddw .d2t1 *++B15, A11:A10 | |
402 | lddw .d2t2 *++B15, B11:B10 | |
403 | lddw .d2t1 *++B15, A13:A12 | |
404 | lddw .d2t2 *++B15, B13:B12 | |
405 | lddw .d2t1 *++B15, A15:A14 | |
406 | || b .s2 B3 | |
407 | ldw .d2t2 *++B15[2], B14 | |
408 | nop 4 | |
409 | #endif | |
410 | ||
411 | #ifdef L_call_stub | |
412 | .align 2 | |
413 | .global __c6xabi_call_stub | |
414 | .type __c6xabi_call_stub, STT_FUNC | |
415 | __c6xabi_call_stub: | |
416 | stw .d2t1 A2, *B15--[2] | |
417 | stdw .d2t1 A7:A6, *B15-- | |
418 | || call .s2 B31 | |
419 | stdw .d2t1 A1:A0, *B15-- | |
420 | stdw .d2t2 B7:B6, *B15-- | |
421 | stdw .d2t2 B5:B4, *B15-- | |
422 | stdw .d2t2 B1:B0, *B15-- | |
423 | stdw .d2t2 B3:B2, *B15-- | |
424 | || addkpc .s2 1f, B3, 0 | |
425 | 1: | |
426 | lddw .d2t2 *++B15, B3:B2 | |
427 | lddw .d2t2 *++B15, B1:B0 | |
428 | lddw .d2t2 *++B15, B5:B4 | |
429 | lddw .d2t2 *++B15, B7:B6 | |
430 | lddw .d2t1 *++B15, A1:A0 | |
431 | lddw .d2t1 *++B15, A7:A6 | |
432 | || b .s2 B3 | |
433 | ldw .d2t1 *++B15[2], A2 | |
434 | nop 4 | |
435 | #endif | |
436 | ||
437 | #endif | |
438 |