]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - ld/emultempl/spu_ovl.S
Update year range in copyright notice of binutils files
[thirdparty/binutils-gdb.git] / ld / emultempl / spu_ovl.S
CommitLineData
e9f53129
AM
1/* Overlay manager for SPU.
2
b3adc24a 3 Copyright (C) 2006-2020 Free Software Foundation, Inc.
e9f53129 4
f96b4a7b 5 This file is part of the GNU Binutils.
e9f53129 6
f96b4a7b 7 This program is free software; you can redistribute it and/or modify
e9f53129 8 it under the terms of the GNU General Public License as published by
f96b4a7b
NC
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
e9f53129 11
f96b4a7b 12 This program is distributed in the hope that it will be useful,
e9f53129
AM
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f96b4a7b
NC
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20 MA 02110-1301, USA. */
e9f53129 21
47f6dab9 22/* MFC DMA defn's. */
e9f53129
AM
23#define MFC_GET_CMD 0x40
24#define MFC_MAX_DMA_SIZE 0x4000
25#define MFC_TAG_UPDATE_ALL 2
26#define MFC_TAG_ID 0
27
47f6dab9
AM
28/* Register usage. */
29#define reserved1 $75
30#define parm $75
31#define tab1 reserved1
32#define tab2 reserved1
33#define vma reserved1
34#define oldvma reserved1
35#define newmask reserved1
36#define map reserved1
37
38#define reserved2 $76
39#define off1 reserved2
40#define off2 reserved2
41#define present1 reserved2
42#define present2 reserved2
43#define sz reserved2
44#define cmp reserved2
45#define add64 reserved2
46#define cgbits reserved2
47#define off3 reserved2
48#define off4 reserved2
2e444bea 49#define addr4 reserved2
47f6dab9
AM
50#define off5 reserved2
51#define tagstat reserved2
52
53#define reserved3 $77
2e444bea
AM
54#define size1 reserved3
55#define size2 reserved3
47f6dab9
AM
56#define rv3 reserved3
57#define ealo reserved3
58#define cmd reserved3
59#define off64 reserved3
60#define tab3 reserved3
61#define tab4 reserved3
62#define tab5 reserved3
63
64#define reserved4 $78
65#define ovl reserved4
66#define rv2 reserved4
67#define rv5 reserved4
68#define cgshuf reserved4
69#define newovl reserved4
99302af9
AM
70#define irqtmp1 reserved4
71#define irqtmp2 reserved4
47f6dab9
AM
72
73#define reserved5 $79
74#define target reserved5
75
99302af9 76#define save1 $74
47f6dab9
AM
77#define rv4 save1
78#define rv7 save1
79#define tagid save1
80#define maxsize save1
81#define pbyte save1
82#define pbit save1
83
84#define save2 $73
85#define cur save2
86#define rv6 save2
87#define osize save2
88#define zovl save2
89#define oldovl save2
90#define newvma save2
91
99302af9 92#define save3 $72
47f6dab9
AM
93#define rv1 save3
94#define ea64 save3
95#define buf3 save3
96#define genwi save3
97#define newmap save3
98#define oldmask save3
e9f53129 99
99302af9
AM
100#define save4 $71
101#define irq_stat save4
c828a49f 102
e9f53129 103 .text
6c19b93b 104 .align 4
47f6dab9
AM
105 .type __rv_pattern, @object
106 .size __rv_pattern, 16
e9f53129 107__rv_pattern:
47f6dab9
AM
108 .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
109
110 .type __cg_pattern, @object
111 .size __cg_pattern, 16
e9f53129 112__cg_pattern:
47f6dab9
AM
113 .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
114
115 .type __ovly_current, @object
116 .size __ovly_current, 16
117__ovly_current:
118 .space 16
e9f53129 119
47f6dab9 120/*
e9f53129
AM
121 * __ovly_return - stub for returning from overlay functions.
122 *
47f6dab9
AM
123 * On entry the four slots of $lr are:
124 * __ovly_return, prev ovl index, caller return addr, undefined.
e9f53129 125 *
47f6dab9
AM
126 * Load the previous overlay and jump to the caller return address.
127 * Updates __ovly_current.
e9f53129 128 */
6c19b93b 129 .align 4
47f6dab9
AM
130 .global __ovly_return
131 .type __ovly_return, @function
e9f53129 132__ovly_return:
47f6dab9
AM
133 ila tab1, _ovly_table - 16 # 0,2 0
134 shlqbyi ovl, $lr, 4 # 1,4 0
135#nop
136 shlqbyi target, $lr, 8 # 1,4 1
137#nop; lnop
138#nop; lnop
139 shli off1, ovl, 4 # 0,4 4
140#lnop
141#nop
142 hbr ovly_ret9, target # 1,15 5
143#nop; lnop
144#nop; lnop
145#nop
146 lqx vma, tab1, off1 # 1,6 8
99302af9
AM
147#ifdef OVLY_IRQ_SAVE
148 nop
149 stqd save4, -64($sp) # 1,6 9
150#else
47f6dab9 151#nop; lnop
99302af9 152#endif
47f6dab9
AM
153#nop; lnop
154#nop; lnop
155#nop; lnop
156#nop; lnop
157#nop
2e444bea 158 rotqbyi size1, vma, 4 # 1,4 14
47f6dab9
AM
159#nop
160 stqd save3, -48($sp) # 1,6 15
161#nop
162 stqd save2, -32($sp) # 1,6 16
163#nop
164 stqd save1, -16($sp) # 1,6 17
2e444bea
AM
165 andi present1, size1, 1 # 0,2 18
166 stqr ovl, __ovly_current # 1,6 18
47f6dab9
AM
167#nop; lnop
168#nop
2e444bea 169 brz present1, do_load # 1,4 20
47f6dab9
AM
170ovly_ret9:
171#nop
172 bi target # 1,4 21
173
174/*
e9f53129
AM
175 * __ovly_load - copy an overlay partion to local store.
176 *
47f6dab9
AM
177 * On entry $75 points to a word consisting of the overlay index in
178 * the top 14 bits, and the target address in the bottom 18 bits.
e9f53129 179 *
99302af9
AM
180 * Sets up $lr to return via __ovly_return. If $lr is already set
181 * to return via __ovly_return, don't change it. In that case we
182 * have a tail call from one overlay function to another.
47f6dab9 183 * Updates __ovly_current.
e9f53129 184 */
47f6dab9
AM
185 .align 3
186 .global __ovly_load
187 .type __ovly_load, @function
e9f53129 188__ovly_load:
47f6dab9
AM
189#if OVL_STUB_SIZE == 8
190########
191#nop
192 lqd target, 0(parm) # 1,6 -11
193#nop; lnop
194#nop; lnop
195#nop; lnop
196#nop; lnop
197#nop; lnop
198#nop
199 rotqby target, target, parm # 1,4 -5
200 ila tab2, _ovly_table - 16 # 0,2 -4
201 stqd save3, -48($sp) # 1,6 -4
202#nop
203 stqd save2, -32($sp) # 1,6 -3
204#nop
205 stqd save1, -16($sp) # 1,6 -2
206 rotmi ovl, target, -18 # 0,4 -1
207 hbr ovly_load9, target # 1,15 -1
208 ila rv1, __ovly_return # 0,2 0
209#lnop
210#nop; lnop
211#nop
2e444bea 212 lqr cur, __ovly_current # 1,6 2
47f6dab9 213 shli off2, ovl, 4 # 0,4 3
2e444bea 214 stqr ovl, __ovly_current # 1,6 3
47f6dab9 215 ceq rv2, $lr, rv1 # 0,2 4
2e444bea 216 lqr rv3, __rv_pattern # 1,6 4
47f6dab9
AM
217#nop; lnop
218#nop; lnop
219#nop
220 lqx vma, tab2, off2 # 1,6 7
221########
222#else /* OVL_STUB_SIZE == 16 */
223########
224 ila tab2, _ovly_table - 16 # 0,2 0
225 stqd save3, -48($sp) # 1,6 0
226 ila rv1, __ovly_return # 0,2 1
227 stqd save2, -32($sp) # 1,6 1
228 shli off2, ovl, 4 # 0,4 2
2e444bea 229 lqr cur, __ovly_current # 1,6 2
47f6dab9 230 nop
2e444bea 231 stqr ovl, __ovly_current # 1,6 3
47f6dab9 232 ceq rv2, $lr, rv1 # 0,2 4
2e444bea 233 lqr rv3, __rv_pattern # 1,6 4
47f6dab9
AM
234#nop
235 hbr ovly_load9, target # 1,15 5
236#nop
237 lqx vma, tab2, off2 # 1,6 6
238#nop
239 stqd save1, -16($sp) # 1,6 7
240########
c828a49f
AM
241#endif
242
47f6dab9
AM
243#nop; lnop
244#nop; lnop
245#nop
246 shufb rv4, rv1, cur, rv3 # 1,4 10
247#nop
248 fsmb rv5, rv2 # 1,4 11
249#nop
250 rotqmbyi rv6, $lr, -8 # 1,4 12
251#nop
2e444bea 252 rotqbyi size2, vma, 4 # 1,4 13
47f6dab9
AM
253#nop
254 lqd save3, -48($sp) # 1,6 14
255#nop; lnop
256 or rv7, rv4, rv6 # 0,2 16
257 lqd save2, -32($sp) # 1,6 16
2e444bea 258 andi present2, size2, 1 # 0,2 17
99302af9
AM
259#ifdef OVLY_IRQ_SAVE
260 stqd save4, -64($sp) # 1,6 17
261#else
47f6dab9 262 lnop # 1,0 17
99302af9 263#endif
47f6dab9
AM
264 selb $lr, rv7, $lr, rv5 # 0,2 18
265 lqd save1, -16($sp) # 1,6 18
266#nop
2e444bea 267 brz present2, do_load # 1,4 19
47f6dab9
AM
268ovly_load9:
269#nop
270 bi target # 1,4 20
271
272/* If we get here, we are about to load a new overlay.
273 * "vma" contains the relevant entry from _ovly_table[].
e9f53129
AM
274 * extern struct {
275 * u32 vma;
276 * u32 size;
277 * u32 file_offset;
278 * u32 buf;
279 * } _ovly_table[];
280 */
47f6dab9
AM
281 .align 3
282 .global __ovly_load_event
283 .type __ovly_load_event, @function
b1e37473 284__ovly_load_event:
2e444bea 285do_load:
99302af9
AM
286#ifdef OVLY_IRQ_SAVE
287 ila irqtmp1, do_load10 # 0,2 -5
288 rotqbyi sz, vma, 8 # 1,4 -5
47f6dab9 289#nop
99302af9 290 rdch irq_stat, $SPU_RdMachStat # 1,6 -4
47f6dab9 291#nop
99302af9
AM
292 bid irqtmp1 # 1,4 -3
293do_load10:
294 nop
295#else
296#nop
297 rotqbyi sz, vma, 8 # 1,4 0
298#endif
47f6dab9
AM
299 rotqbyi osize, vma, 4 # 1,4 1
300#nop
301 lqa ea64, _EAR_ # 1,6 2
302#nop
2e444bea 303 lqr cgshuf, __cg_pattern # 1,6 3
47f6dab9
AM
304
305/* We could predict the branch at the end of this loop by adding a few
306 instructions, and there are plenty of free cycles to do so without
307 impacting loop execution time. However, it doesn't make a great
308 deal of sense since we need to wait for the dma to complete anyway. */
e9f53129 309__ovly_xfer_loop:
47f6dab9
AM
310#nop
311 rotqmbyi off64, sz, -4 # 1,4 4
312#nop; lnop
313#nop; lnop
314#nop; lnop
315 cg cgbits, ea64, off64 # 0,2 8
316#lnop
317#nop; lnop
318#nop
319 shufb add64, cgbits, cgbits, cgshuf # 1,4 10
320#nop; lnop
321#nop; lnop
322#nop; lnop
323 addx add64, ea64, off64 # 0,2 14
324#lnop
325 ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
326 lnop
327 ori ea64, add64, 0 # 0,2 16
328 rotqbyi ealo, add64, 4 # 1,4 16
329 cgt cmp, osize, maxsize # 0,2 17
330 wrch $MFC_LSA, vma # 1,6 17
331#nop; lnop
332 selb sz, osize, maxsize, cmp # 0,2 19
333 wrch $MFC_EAH, ea64 # 1,6 19
334 ila tagid, MFC_TAG_ID # 0,2 20
335 wrch $MFC_EAL, ealo # 1,6 20
336 ila cmd, MFC_GET_CMD # 0,2 21
337 wrch $MFC_Size, sz # 1,6 21
338 sf osize, sz, osize # 0,2 22
339 wrch $MFC_TagId, tagid # 1,6 22
340 a vma, vma, sz # 0,2 23
341 wrch $MFC_Cmd, cmd # 1,6 23
342#nop
343 brnz osize, __ovly_xfer_loop # 1,4 24
344
345/* Now update our data structions while waiting for DMA to complete.
2e444bea 346 Low bit of .size needs to be cleared on the _ovly_table entry
47f6dab9
AM
347 corresponding to the evicted overlay, and set on the entry for the
348 newly loaded overlay. Note that no overlay may in fact be evicted
2e444bea 349 as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
47f6dab9
AM
350 for zero index! Also of course update the _ovly_buf_table entry. */
351#nop
2e444bea 352 lqr newovl, __ovly_current # 1,6 25
47f6dab9
AM
353#nop; lnop
354#nop; lnop
355#nop; lnop
356#nop; lnop
357#nop; lnop
358 shli off3, newovl, 4 # 0,4 31
359#lnop
360 ila tab3, _ovly_table - 16 # 0,2 32
361#lnop
362#nop
2e444bea 363 fsmbi pbyte, 0x100 # 1,4 33
47f6dab9
AM
364#nop; lnop
365#nop
366 lqx vma, tab3, off3 # 1,6 35
367#nop; lnop
368 andi pbit, pbyte, 1 # 0,2 37
369 lnop
370#nop; lnop
371#nop; lnop
372#nop; lnop
373 or newvma, vma, pbit # 0,2 41
374 rotqbyi buf3, vma, 12 # 1,4 41
375#nop; lnop
376#nop
377 stqx newvma, tab3, off3 # 1,6 43
378#nop; lnop
379 shli off4, buf3, 2 # 1,4 45
380#lnop
2e444bea 381 ila tab4, _ovly_buf_table - 4 # 0,2 46
47f6dab9
AM
382#lnop
383#nop; lnop
384#nop; lnop
385#nop
386 lqx map, tab4, off4 # 1,6 49
387#nop
388 cwx genwi, tab4, off4 # 1,4 50
2e444bea
AM
389 a addr4, tab4, off4 # 0,2 51
390#lnop
47f6dab9
AM
391#nop; lnop
392#nop; lnop
393#nop; lnop
394#nop
2e444bea
AM
395 rotqby oldovl, map, addr4 # 1,4 55
396#nop
47f6dab9 397 shufb newmap, newovl, map, genwi # 0,4 56
e9f53129 398#if MFC_TAG_ID < 16
47f6dab9 399 ila newmask, 1 << MFC_TAG_ID # 0,2 57
e9f53129 400#else
47f6dab9 401 ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
c828a49f 402#endif
47f6dab9
AM
403#lnop
404#nop; lnop
405#nop; lnop
2e444bea 406 stqd newmap, 0(addr4) # 1,6 60
47f6dab9
AM
407
408/* Save app's tagmask, wait for DMA complete, restore mask. */
409 ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
410 rdch oldmask, $MFC_RdTagMask # 1,6 61
411#nop
412 wrch $MFC_WrTagMask, newmask # 1,6 62
413#nop
414 wrch $MFC_WrTagUpdate, tagstat # 1,6 63
415#nop
416 rdch tagstat, $MFC_RdTagStat # 1,6 64
417#nop
418 sync # 1,4 65
419/* Any hint prior to the sync is lost. A hint here allows the branch
420 to complete 15 cycles after the hint. With no hint the branch will
421 take 18 or 19 cycles. */
422 ila tab5, _ovly_table - 16 # 0,2 66
423 hbr do_load99, target # 1,15 66
424 shli off5, oldovl, 4 # 0,4 67
425 wrch $MFC_WrTagMask, oldmask # 1,6 67
426 ceqi zovl, oldovl, 0 # 0,2 68
427#lnop
428#nop; lnop
429#nop
430 fsm zovl, zovl # 1,4 70
431#nop
432 lqx oldvma, tab5, off5 # 1,6 71
433#nop
434 lqd save3, -48($sp) # 1,6 72
435#nop; lnop
436 andc pbit, pbit, zovl # 0,2 74
437 lqd save2, -32($sp) # 1,6 74
99302af9
AM
438#ifdef OVLY_IRQ_SAVE
439 ila irqtmp2, do_load90 # 0,2 75
440#lnop
441 andi irq_stat, irq_stat, 1 # 0,2 76
442#lnop
443#else
47f6dab9
AM
444#nop; lnop
445#nop; lnop
99302af9 446#endif
47f6dab9
AM
447 andc oldvma, oldvma, pbit # 0,2 77
448 lqd save1, -16($sp) # 1,6 77
6c19b93b 449 nop # 0,0 78
99302af9
AM
450#lnop
451#nop
47f6dab9 452 stqx oldvma, tab5, off5 # 1,6 79
99302af9
AM
453#nop
454#ifdef OVLY_IRQ_SAVE
455 binze irq_stat, irqtmp2 # 1,4 80
456do_load90:
457#nop
458 lqd save4, -64($sp) # 1,6 84
459#else
47f6dab9 460#nop; lnop
99302af9 461#endif
c828a49f 462
47f6dab9
AM
463 .global _ovly_debug_event
464 .type _ovly_debug_event, @function
e9f53129 465_ovly_debug_event:
e9f53129 466 nop
e9f53129 467/* Branch to target address. */
47f6dab9 468do_load99:
99302af9 469 bi target # 1,4 81/85
b1e37473 470
47f6dab9 471 .size __ovly_load, . - __ovly_load