]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/spu/cachemgr.c
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / spu / cachemgr.c
CommitLineData
f1717362 1/* Copyright (C) 2008-2016 Free Software Foundation, Inc.
6cf5579e 2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free
7Software Foundation; either version 3, or (at your option) any later
8version.
9
10GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13for more details.
14
15Under Section 7 of GPL version 3, you are granted additional
16permissions described in the GCC Runtime Library Exception, version
173.1, as published by the Free Software Foundation.
18
19You should have received a copy of the GNU General Public License and
20a copy of the GCC Runtime Library Exception along with this program;
21see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22<http://www.gnu.org/licenses/>. */
23
24#include <spu_mfcio.h>
25#include <spu_internals.h>
26#include <spu_intrinsics.h>
27#include <spu_cache.h>
28
29extern unsigned long long __ea_local_store;
30extern char __cache_tag_array_size;
31
32#define LINE_SIZE 128
33#define TAG_MASK (LINE_SIZE - 1)
34
35#define WAYS 4
36#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
37
38#define CACHE_LINES ((int) &__cache_tag_array_size / \
39 sizeof (struct __cache_tag_array) * WAYS)
40
41struct __cache_tag_array
42{
43 unsigned int tag_lo[WAYS];
44 unsigned int tag_hi[WAYS];
45 void *base[WAYS];
46 int reserved[WAYS];
47 vector unsigned short dirty_bits[WAYS];
48};
49
50extern struct __cache_tag_array __cache_tag_array[];
51extern char __cache[];
52
53/* In order to make the code seem a little cleaner, and to avoid having
54 64/32 bit ifdefs all over the place, we use macros. */
55
56#ifdef __EA64__
57typedef unsigned long long addr;
58
59#define CHECK_TAG(_entry, _way, _tag) \
60 ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF) \
61 && (_entry)->tag_hi[(_way)] == ((_tag) >> 32))
62
63#define GET_TAG(_entry, _way) \
64 ((unsigned long long)(_entry)->tag_hi[(_way)] << 32 \
65 | (unsigned long long)(_entry)->tag_lo[(_way)])
66
67#define SET_TAG(_entry, _way, _tag) \
68 (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF; \
69 (_entry)->tag_hi[(_way)] = (_tag) >> 32
70
71#else /*__EA32__*/
72typedef unsigned long addr;
73
74#define CHECK_TAG(_entry, _way, _tag) \
75 ((_entry)->tag_lo[(_way)] == (_tag))
76
77#define GET_TAG(_entry, _way) \
78 ((_entry)->tag_lo[(_way)])
79
80#define SET_TAG(_entry, _way, _tag) \
81 (_entry)->tag_lo[(_way)] = (_tag)
82
83#endif
84
85/* In GET_ENTRY, we cast away the high 32 bits,
86 as the tag is only in the low 32. */
87
88#define GET_ENTRY(_addr) \
89 ((struct __cache_tag_array *) \
90 si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \
91 si_from_uint (SET_MASK)), \
92 si_from_uint ((unsigned int) __cache_tag_array))))
93
94#define GET_CACHE_LINE(_addr, _way) \
95 ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE));
96
97#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec))))
98#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1)
99#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1)
100
101#define LS_FLAG 0x80000000
102#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG)
103#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG)
104#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG)
105
106static int dma_tag = 32;
107
108static void
109__cache_evict_entry (struct __cache_tag_array *entry, int way)
110{
111 addr tag = GET_TAG (entry, way);
112
113 if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way))
114 {
115#ifdef NONATOMIC
116 /* Non-atomic writes. */
117 unsigned int oldmask, mach_stat;
118 char *line = ((void *) 0);
119
120 /* Enter critical section. */
121 mach_stat = spu_readch (SPU_RdMachStat);
122 spu_idisable ();
123
124 /* Issue DMA request. */
125 line = GET_CACHE_LINE (entry->tag_lo[way], way);
126 mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
127
128 /* Wait for DMA completion. */
129 oldmask = mfc_read_tag_mask ();
130 mfc_write_tag_mask (1 << dma_tag);
131 mfc_read_tag_status_all ();
132 mfc_write_tag_mask (oldmask);
133
134 /* Leave critical section. */
135 if (__builtin_expect (mach_stat & 1, 0))
136 spu_ienable ();
137#else
138 /* Allocate a buffer large enough that we know it has 128 bytes
139 that are 128 byte aligned (for DMA). */
140
141 char buffer[LINE_SIZE + 127];
142 qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
143 qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
144 qword bits;
145 unsigned int mach_stat;
146
147 /* Enter critical section. */
148 mach_stat = spu_readch (SPU_RdMachStat);
149 spu_idisable ();
150
151 do
152 {
153 /* We atomically read the current memory into a buffer
154 modify the dirty bytes in the buffer, and write it
155 back. If writeback fails, loop and try again. */
156
157 mfc_getllar (buf_ptr, tag, 0, 0);
158 mfc_read_atomic_status ();
159
160 /* The method we're using to write 16 dirty bytes into
161 the buffer at a time uses fsmb which in turn uses
162 the least significant 16 bits of word 0, so we
163 load the bits and rotate so that the first bit of
164 the bitmap is in the first bit that fsmb will use. */
165
166 bits = (qword) entry->dirty_bits[way];
167 bits = si_rotqbyi (bits, -2);
168
169 /* Si_fsmb creates the mask of dirty bytes.
170 Use selb to nab the appropriate bits. */
171 buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
172
173 /* Rotate to next 16 byte section of cache. */
174 bits = si_rotqbyi (bits, 2);
175
176 buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
177 bits = si_rotqbyi (bits, 2);
178 buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
179 bits = si_rotqbyi (bits, 2);
180 buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
181 bits = si_rotqbyi (bits, 2);
182 buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
183 bits = si_rotqbyi (bits, 2);
184 buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
185 bits = si_rotqbyi (bits, 2);
186 buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
187 bits = si_rotqbyi (bits, 2);
188 buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
189 bits = si_rotqbyi (bits, 2);
190
191 mfc_putllc (buf_ptr, tag, 0, 0);
192 }
193 while (mfc_read_atomic_status ());
194
195 /* Leave critical section. */
196 if (__builtin_expect (mach_stat & 1, 0))
197 spu_ienable ();
198#endif
199 }
200
201 /* In any case, marking the lo tag with 1 which denotes empty. */
202 SET_EMPTY (entry, way);
203 entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
204}
205
206void
207__cache_evict (__ea void *ea)
208{
209 addr tag = (addr) ea & ~TAG_MASK;
210 struct __cache_tag_array *entry = GET_ENTRY (ea);
211 int i = 0;
212
213 /* Cycles through all the possible ways an address could be at
214 and evicts the way if found. */
215
216 for (i = 0; i < WAYS; i++)
217 if (CHECK_TAG (entry, i, tag))
218 __cache_evict_entry (entry, i);
219}
220
221static void *
222__cache_fill (int way, addr tag)
223{
224 unsigned int oldmask, mach_stat;
225 char *line = ((void *) 0);
226
227 /* Reserve our DMA tag. */
228 if (dma_tag == 32)
229 dma_tag = mfc_tag_reserve ();
230
231 /* Enter critical section. */
232 mach_stat = spu_readch (SPU_RdMachStat);
233 spu_idisable ();
234
235 /* Issue DMA request. */
236 line = GET_CACHE_LINE (tag, way);
237 mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
238
239 /* Wait for DMA completion. */
240 oldmask = mfc_read_tag_mask ();
241 mfc_write_tag_mask (1 << dma_tag);
242 mfc_read_tag_status_all ();
243 mfc_write_tag_mask (oldmask);
244
245 /* Leave critical section. */
246 if (__builtin_expect (mach_stat & 1, 0))
247 spu_ienable ();
248
249 return (void *) line;
250}
251
252static void
253__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
254{
255
256 addr tag = (addr) ea & ~TAG_MASK;
257 unsigned int lru = 0;
258 int i = 0;
259 int idx = 0;
260
261 /* If way > 4, then there are no empty slots, so we must evict
262 the least recently used entry. */
263 if (way >= 4)
264 {
265 for (i = 0; i < WAYS; i++)
266 {
267 if (GET_LRU (entry, i) > lru)
268 {
269 lru = GET_LRU (entry, i);
270 idx = i;
271 }
272 }
273 __cache_evict_entry (entry, idx);
274 way = idx;
275 }
276
277 /* Set the empty entry's tag and fill it's cache line. */
278
279 SET_TAG (entry, way, tag);
280 entry->reserved[way] = 0;
281
282 /* Check if the address is just an effective address within the
283 SPU's local store. */
284
285 /* Because the LS is not 256k aligned, we can't do a nice and mask
286 here to compare, so we must check the whole range. */
287
288 if ((addr) ea >= (addr) __ea_local_store
289 && (addr) ea < (addr) (__ea_local_store + 0x40000))
290 {
291 SET_IS_LS (entry, way);
292 entry->base[way] =
293 (void *) ((unsigned int) ((addr) ea -
294 (addr) __ea_local_store) & ~0x7f);
295 }
296 else
297 {
298 entry->base[way] = __cache_fill (way, tag);
299 }
300}
301
302void *
303__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
304{
305#ifdef __EA64__
306 unsigned int tag_hi;
307 qword etag_hi;
308#endif
309 unsigned int tag_lo;
310 struct __cache_tag_array *entry;
311
312 qword etag_lo;
313 qword equal;
314 qword bit_mask;
315 qword way;
316
317 /* This first chunk, we merely fill the pointer and tag. */
318
319 entry = GET_ENTRY (ea);
320
321#ifndef __EA64__
322 tag_lo =
323 si_to_uint (si_andc
324 (si_shufb
325 (si_from_uint ((addr) ea), si_from_uint (0),
326 si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
327#else
328 tag_lo =
329 si_to_uint (si_andc
330 (si_shufb
331 (si_from_ullong ((addr) ea), si_from_uint (0),
332 si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
333
334 tag_hi =
335 si_to_uint (si_shufb
336 (si_from_ullong ((addr) ea), si_from_uint (0),
337 si_from_uint (0x00010203)));
338#endif
339
340 /* Increment LRU in reserved bytes. */
341 si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
342 si_from_ptr (entry), 48);
343
344missreturn:
345 /* Check if the entry's lo_tag is equal to the address' lo_tag. */
346 etag_lo = si_lqd (si_from_ptr (entry), 0);
347 equal = si_ceq (etag_lo, si_from_uint (tag_lo));
348#ifdef __EA64__
349 /* And the high tag too. */
350 etag_hi = si_lqd (si_from_ptr (entry), 16);
351 equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
352#endif
353
354 if ((si_to_uint (si_orx (equal)) == 0))
355 goto misshandler;
356
357 if (n_bytes_dirty)
358 {
359 /* way = 0x40,0x50,0x60,0x70 for each way, which is also the
360 offset of the appropriate dirty bits. */
361 way = si_shli (si_clz (si_gbb (equal)), 2);
362
363 /* To create the bit_mask, we set it to all 1s (uint -1), then we
364 shift it over (128 - n_bytes_dirty) times. */
365
366 bit_mask = si_from_uint (-1);
367
368 bit_mask =
369 si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
370
371 bit_mask =
372 si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
373
374 /* Rotate it around to the correct offset. */
375 bit_mask =
376 si_rotqby (bit_mask,
377 si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8));
378
379 bit_mask =
380 si_rotqbi (bit_mask,
381 si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8));
382
383 /* Update the dirty bits. */
384 si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
385 si_from_ptr (entry), way);
386 };
387
388 /* We've definitely found the right entry, set LRU (reserved) to 0
389 maintaining the LS flag (MSB). */
390
391 si_stqd (si_andc
392 (si_lqd (si_from_ptr (entry), 48),
393 si_and (equal, si_from_uint (~(LS_FLAG)))),
394 si_from_ptr (entry), 48);
395
396 return (void *)
397 si_to_uint (si_a
398 (si_orx
399 (si_and (si_lqd (si_from_ptr (entry), 32), equal)),
400 si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
401
402misshandler:
403 equal = si_ceqi (etag_lo, 1);
404 __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
405 goto missreturn;
406}
407
408void *
409__cache_fetch (__ea void *ea)
410{
411 return __cache_fetch_dirty (ea, 0);
412}
413
414void
415__cache_touch (__ea void *ea __attribute__ ((unused)))
416{
417 /* NO-OP for now. */
418}
419
420void __cache_flush (void) __attribute__ ((destructor));
421void
422__cache_flush (void)
423{
424 struct __cache_tag_array *entry = __cache_tag_array;
425 unsigned int i;
426 int j;
427
428 /* Cycle through each cache entry and evict all used ways. */
429
430 for (i = 0; i < CACHE_LINES / WAYS; i++)
431 {
432 for (j = 0; j < WAYS; j++)
433 if (!CHECK_EMPTY (entry, j))
434 __cache_evict_entry (entry, j);
435
436 entry++;
437 }
438}