]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - ld/emultempl/spu_ovl.S
Update year range in copyright notice of binutils files
[thirdparty/binutils-gdb.git] / ld / emultempl / spu_ovl.S
1 /* Overlay manager for SPU.
2
3 Copyright (C) 2006-2021 Free Software Foundation, Inc.
4
5 This file is part of the GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20 MA 02110-1301, USA. */
21
22 /* MFC DMA defn's. */
23 #define MFC_GET_CMD 0x40
24 #define MFC_MAX_DMA_SIZE 0x4000
25 #define MFC_TAG_UPDATE_ALL 2
26 #define MFC_TAG_ID 0
27
28 /* Register usage. */
29 #define reserved1 $75
30 #define parm $75
31 #define tab1 reserved1
32 #define tab2 reserved1
33 #define vma reserved1
34 #define oldvma reserved1
35 #define newmask reserved1
36 #define map reserved1
37
38 #define reserved2 $76
39 #define off1 reserved2
40 #define off2 reserved2
41 #define present1 reserved2
42 #define present2 reserved2
43 #define sz reserved2
44 #define cmp reserved2
45 #define add64 reserved2
46 #define cgbits reserved2
47 #define off3 reserved2
48 #define off4 reserved2
49 #define addr4 reserved2
50 #define off5 reserved2
51 #define tagstat reserved2
52
53 #define reserved3 $77
54 #define size1 reserved3
55 #define size2 reserved3
56 #define rv3 reserved3
57 #define ealo reserved3
58 #define cmd reserved3
59 #define off64 reserved3
60 #define tab3 reserved3
61 #define tab4 reserved3
62 #define tab5 reserved3
63
64 #define reserved4 $78
65 #define ovl reserved4
66 #define rv2 reserved4
67 #define rv5 reserved4
68 #define cgshuf reserved4
69 #define newovl reserved4
70 #define irqtmp1 reserved4
71 #define irqtmp2 reserved4
72
73 #define reserved5 $79
74 #define target reserved5
75
76 #define save1 $74
77 #define rv4 save1
78 #define rv7 save1
79 #define tagid save1
80 #define maxsize save1
81 #define pbyte save1
82 #define pbit save1
83
84 #define save2 $73
85 #define cur save2
86 #define rv6 save2
87 #define osize save2
88 #define zovl save2
89 #define oldovl save2
90 #define newvma save2
91
92 #define save3 $72
93 #define rv1 save3
94 #define ea64 save3
95 #define buf3 save3
96 #define genwi save3
97 #define newmap save3
98 #define oldmask save3
99
100 #define save4 $71
101 #define irq_stat save4
102
103 .text
104 .align 4
105 .type __rv_pattern, @object
106 .size __rv_pattern, 16
107 __rv_pattern:
108 .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
109
110 .type __cg_pattern, @object
111 .size __cg_pattern, 16
112 __cg_pattern:
113 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
114
115 .type __ovly_current, @object
116 .size __ovly_current, 16
117 __ovly_current:
118 .space 16
119
120 /*
121 * __ovly_return - stub for returning from overlay functions.
122 *
123 * On entry the four slots of $lr are:
124 * __ovly_return, prev ovl index, caller return addr, undefined.
125 *
126 * Load the previous overlay and jump to the caller return address.
127 * Updates __ovly_current.
128 */
129 .align 4
130 .global __ovly_return
131 .type __ovly_return, @function
132 __ovly_return:
133 ila tab1, _ovly_table - 16 # 0,2 0
134 shlqbyi ovl, $lr, 4 # 1,4 0
135 #nop
136 shlqbyi target, $lr, 8 # 1,4 1
137 #nop; lnop
138 #nop; lnop
139 shli off1, ovl, 4 # 0,4 4
140 #lnop
141 #nop
142 hbr ovly_ret9, target # 1,15 5
143 #nop; lnop
144 #nop; lnop
145 #nop
146 lqx vma, tab1, off1 # 1,6 8
147 #ifdef OVLY_IRQ_SAVE
148 nop
149 stqd save4, -64($sp) # 1,6 9
150 #else
151 #nop; lnop
152 #endif
153 #nop; lnop
154 #nop; lnop
155 #nop; lnop
156 #nop; lnop
157 #nop
158 rotqbyi size1, vma, 4 # 1,4 14
159 #nop
160 stqd save3, -48($sp) # 1,6 15
161 #nop
162 stqd save2, -32($sp) # 1,6 16
163 #nop
164 stqd save1, -16($sp) # 1,6 17
165 andi present1, size1, 1 # 0,2 18
166 stqr ovl, __ovly_current # 1,6 18
167 #nop; lnop
168 #nop
169 brz present1, do_load # 1,4 20
170 ovly_ret9:
171 #nop
172 bi target # 1,4 21
173
174 /*
175 * __ovly_load - copy an overlay partion to local store.
176 *
177 * On entry $75 points to a word consisting of the overlay index in
178 * the top 14 bits, and the target address in the bottom 18 bits.
179 *
180 * Sets up $lr to return via __ovly_return. If $lr is already set
181 * to return via __ovly_return, don't change it. In that case we
182 * have a tail call from one overlay function to another.
183 * Updates __ovly_current.
184 */
185 .align 3
186 .global __ovly_load
187 .type __ovly_load, @function
188 __ovly_load:
189 #if OVL_STUB_SIZE == 8
190 ########
191 #nop
192 lqd target, 0(parm) # 1,6 -11
193 #nop; lnop
194 #nop; lnop
195 #nop; lnop
196 #nop; lnop
197 #nop; lnop
198 #nop
199 rotqby target, target, parm # 1,4 -5
200 ila tab2, _ovly_table - 16 # 0,2 -4
201 stqd save3, -48($sp) # 1,6 -4
202 #nop
203 stqd save2, -32($sp) # 1,6 -3
204 #nop
205 stqd save1, -16($sp) # 1,6 -2
206 rotmi ovl, target, -18 # 0,4 -1
207 hbr ovly_load9, target # 1,15 -1
208 ila rv1, __ovly_return # 0,2 0
209 #lnop
210 #nop; lnop
211 #nop
212 lqr cur, __ovly_current # 1,6 2
213 shli off2, ovl, 4 # 0,4 3
214 stqr ovl, __ovly_current # 1,6 3
215 ceq rv2, $lr, rv1 # 0,2 4
216 lqr rv3, __rv_pattern # 1,6 4
217 #nop; lnop
218 #nop; lnop
219 #nop
220 lqx vma, tab2, off2 # 1,6 7
221 ########
222 #else /* OVL_STUB_SIZE == 16 */
223 ########
224 ila tab2, _ovly_table - 16 # 0,2 0
225 stqd save3, -48($sp) # 1,6 0
226 ila rv1, __ovly_return # 0,2 1
227 stqd save2, -32($sp) # 1,6 1
228 shli off2, ovl, 4 # 0,4 2
229 lqr cur, __ovly_current # 1,6 2
230 nop
231 stqr ovl, __ovly_current # 1,6 3
232 ceq rv2, $lr, rv1 # 0,2 4
233 lqr rv3, __rv_pattern # 1,6 4
234 #nop
235 hbr ovly_load9, target # 1,15 5
236 #nop
237 lqx vma, tab2, off2 # 1,6 6
238 #nop
239 stqd save1, -16($sp) # 1,6 7
240 ########
241 #endif
242
243 #nop; lnop
244 #nop; lnop
245 #nop
246 shufb rv4, rv1, cur, rv3 # 1,4 10
247 #nop
248 fsmb rv5, rv2 # 1,4 11
249 #nop
250 rotqmbyi rv6, $lr, -8 # 1,4 12
251 #nop
252 rotqbyi size2, vma, 4 # 1,4 13
253 #nop
254 lqd save3, -48($sp) # 1,6 14
255 #nop; lnop
256 or rv7, rv4, rv6 # 0,2 16
257 lqd save2, -32($sp) # 1,6 16
258 andi present2, size2, 1 # 0,2 17
259 #ifdef OVLY_IRQ_SAVE
260 stqd save4, -64($sp) # 1,6 17
261 #else
262 lnop # 1,0 17
263 #endif
264 selb $lr, rv7, $lr, rv5 # 0,2 18
265 lqd save1, -16($sp) # 1,6 18
266 #nop
267 brz present2, do_load # 1,4 19
268 ovly_load9:
269 #nop
270 bi target # 1,4 20
271
272 /* If we get here, we are about to load a new overlay.
273 * "vma" contains the relevant entry from _ovly_table[].
274 * extern struct {
275 * u32 vma;
276 * u32 size;
277 * u32 file_offset;
278 * u32 buf;
279 * } _ovly_table[];
280 */
281 .align 3
282 .global __ovly_load_event
283 .type __ovly_load_event, @function
284 __ovly_load_event:
285 do_load:
286 #ifdef OVLY_IRQ_SAVE
287 ila irqtmp1, do_load10 # 0,2 -5
288 rotqbyi sz, vma, 8 # 1,4 -5
289 #nop
290 rdch irq_stat, $SPU_RdMachStat # 1,6 -4
291 #nop
292 bid irqtmp1 # 1,4 -3
293 do_load10:
294 nop
295 #else
296 #nop
297 rotqbyi sz, vma, 8 # 1,4 0
298 #endif
299 rotqbyi osize, vma, 4 # 1,4 1
300 #nop
301 lqa ea64, _EAR_ # 1,6 2
302 #nop
303 lqr cgshuf, __cg_pattern # 1,6 3
304
305 /* We could predict the branch at the end of this loop by adding a few
306 instructions, and there are plenty of free cycles to do so without
307 impacting loop execution time. However, it doesn't make a great
308 deal of sense since we need to wait for the dma to complete anyway. */
309 __ovly_xfer_loop:
310 #nop
311 rotqmbyi off64, sz, -4 # 1,4 4
312 #nop; lnop
313 #nop; lnop
314 #nop; lnop
315 cg cgbits, ea64, off64 # 0,2 8
316 #lnop
317 #nop; lnop
318 #nop
319 shufb add64, cgbits, cgbits, cgshuf # 1,4 10
320 #nop; lnop
321 #nop; lnop
322 #nop; lnop
323 addx add64, ea64, off64 # 0,2 14
324 #lnop
325 ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
326 lnop
327 ori ea64, add64, 0 # 0,2 16
328 rotqbyi ealo, add64, 4 # 1,4 16
329 cgt cmp, osize, maxsize # 0,2 17
330 wrch $MFC_LSA, vma # 1,6 17
331 #nop; lnop
332 selb sz, osize, maxsize, cmp # 0,2 19
333 wrch $MFC_EAH, ea64 # 1,6 19
334 ila tagid, MFC_TAG_ID # 0,2 20
335 wrch $MFC_EAL, ealo # 1,6 20
336 ila cmd, MFC_GET_CMD # 0,2 21
337 wrch $MFC_Size, sz # 1,6 21
338 sf osize, sz, osize # 0,2 22
339 wrch $MFC_TagId, tagid # 1,6 22
340 a vma, vma, sz # 0,2 23
341 wrch $MFC_Cmd, cmd # 1,6 23
342 #nop
343 brnz osize, __ovly_xfer_loop # 1,4 24
344
345 /* Now update our data structions while waiting for DMA to complete.
346 Low bit of .size needs to be cleared on the _ovly_table entry
347 corresponding to the evicted overlay, and set on the entry for the
348 newly loaded overlay. Note that no overlay may in fact be evicted
349 as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
350 for zero index! Also of course update the _ovly_buf_table entry. */
351 #nop
352 lqr newovl, __ovly_current # 1,6 25
353 #nop; lnop
354 #nop; lnop
355 #nop; lnop
356 #nop; lnop
357 #nop; lnop
358 shli off3, newovl, 4 # 0,4 31
359 #lnop
360 ila tab3, _ovly_table - 16 # 0,2 32
361 #lnop
362 #nop
363 fsmbi pbyte, 0x100 # 1,4 33
364 #nop; lnop
365 #nop
366 lqx vma, tab3, off3 # 1,6 35
367 #nop; lnop
368 andi pbit, pbyte, 1 # 0,2 37
369 lnop
370 #nop; lnop
371 #nop; lnop
372 #nop; lnop
373 or newvma, vma, pbit # 0,2 41
374 rotqbyi buf3, vma, 12 # 1,4 41
375 #nop; lnop
376 #nop
377 stqx newvma, tab3, off3 # 1,6 43
378 #nop; lnop
379 shli off4, buf3, 2 # 1,4 45
380 #lnop
381 ila tab4, _ovly_buf_table - 4 # 0,2 46
382 #lnop
383 #nop; lnop
384 #nop; lnop
385 #nop
386 lqx map, tab4, off4 # 1,6 49
387 #nop
388 cwx genwi, tab4, off4 # 1,4 50
389 a addr4, tab4, off4 # 0,2 51
390 #lnop
391 #nop; lnop
392 #nop; lnop
393 #nop; lnop
394 #nop
395 rotqby oldovl, map, addr4 # 1,4 55
396 #nop
397 shufb newmap, newovl, map, genwi # 0,4 56
398 #if MFC_TAG_ID < 16
399 ila newmask, 1 << MFC_TAG_ID # 0,2 57
400 #else
401 ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
402 #endif
403 #lnop
404 #nop; lnop
405 #nop; lnop
406 stqd newmap, 0(addr4) # 1,6 60
407
408 /* Save app's tagmask, wait for DMA complete, restore mask. */
409 ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
410 rdch oldmask, $MFC_RdTagMask # 1,6 61
411 #nop
412 wrch $MFC_WrTagMask, newmask # 1,6 62
413 #nop
414 wrch $MFC_WrTagUpdate, tagstat # 1,6 63
415 #nop
416 rdch tagstat, $MFC_RdTagStat # 1,6 64
417 #nop
418 sync # 1,4 65
419 /* Any hint prior to the sync is lost. A hint here allows the branch
420 to complete 15 cycles after the hint. With no hint the branch will
421 take 18 or 19 cycles. */
422 ila tab5, _ovly_table - 16 # 0,2 66
423 hbr do_load99, target # 1,15 66
424 shli off5, oldovl, 4 # 0,4 67
425 wrch $MFC_WrTagMask, oldmask # 1,6 67
426 ceqi zovl, oldovl, 0 # 0,2 68
427 #lnop
428 #nop; lnop
429 #nop
430 fsm zovl, zovl # 1,4 70
431 #nop
432 lqx oldvma, tab5, off5 # 1,6 71
433 #nop
434 lqd save3, -48($sp) # 1,6 72
435 #nop; lnop
436 andc pbit, pbit, zovl # 0,2 74
437 lqd save2, -32($sp) # 1,6 74
438 #ifdef OVLY_IRQ_SAVE
439 ila irqtmp2, do_load90 # 0,2 75
440 #lnop
441 andi irq_stat, irq_stat, 1 # 0,2 76
442 #lnop
443 #else
444 #nop; lnop
445 #nop; lnop
446 #endif
447 andc oldvma, oldvma, pbit # 0,2 77
448 lqd save1, -16($sp) # 1,6 77
449 nop # 0,0 78
450 #lnop
451 #nop
452 stqx oldvma, tab5, off5 # 1,6 79
453 #nop
454 #ifdef OVLY_IRQ_SAVE
455 binze irq_stat, irqtmp2 # 1,4 80
456 do_load90:
457 #nop
458 lqd save4, -64($sp) # 1,6 84
459 #else
460 #nop; lnop
461 #endif
462
463 .global _ovly_debug_event
464 .type _ovly_debug_event, @function
465 _ovly_debug_event:
466 nop
467 /* Branch to target address. */
468 do_load99:
469 bi target # 1,4 81/85
470
471 .size __ovly_load, . - __ovly_load