]>
Commit | Line | Data |
---|---|---|
299456f3 BE |
1 | /* Copyright (C) 2008, 2009 Free Software Foundation, Inc. |
2 | ||
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify it under | |
6 | the terms of the GNU General Public License as published by the Free | |
7 | Software Foundation; either version 3, or (at your option) any later | |
8 | version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | for more details. | |
14 | ||
15 | Under Section 7 of GPL version 3, you are granted additional | |
16 | permissions described in the GCC Runtime Library Exception, version | |
17 | 3.1, as published by the Free Software Foundation. | |
18 | ||
19 | You should have received a copy of the GNU General Public License and | |
20 | a copy of the GCC Runtime Library Exception along with this program; | |
21 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 | <http://www.gnu.org/licenses/>. */ | |
23 | ||
24 | #include <spu_mfcio.h> | |
25 | #include <spu_internals.h> | |
26 | #include <spu_intrinsics.h> | |
27 | #include <spu_cache.h> | |
28 | ||
29 | extern unsigned long long __ea_local_store; | |
30 | extern char __cache_tag_array_size; | |
31 | ||
32 | #define LINE_SIZE 128 | |
33 | #define TAG_MASK (LINE_SIZE - 1) | |
34 | ||
35 | #define WAYS 4 | |
36 | #define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE) | |
37 | ||
38 | #define CACHE_LINES ((int) &__cache_tag_array_size / \ | |
39 | sizeof (struct __cache_tag_array) * WAYS) | |
40 | ||
41 | struct __cache_tag_array | |
42 | { | |
43 | unsigned int tag_lo[WAYS]; | |
44 | unsigned int tag_hi[WAYS]; | |
45 | void *base[WAYS]; | |
46 | int reserved[WAYS]; | |
47 | vector unsigned short dirty_bits[WAYS]; | |
48 | }; | |
49 | ||
50 | extern struct __cache_tag_array __cache_tag_array[]; | |
51 | extern char __cache[]; | |
52 | ||
53 | /* In order to make the code seem a little cleaner, and to avoid having | |
54 | 64/32 bit ifdefs all over the place, we use macros. */ | |
55 | ||
56 | #ifdef __EA64__ | |
57 | typedef unsigned long long addr; | |
58 | ||
59 | #define CHECK_TAG(_entry, _way, _tag) \ | |
60 | ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF) \ | |
61 | && (_entry)->tag_hi[(_way)] == ((_tag) >> 32)) | |
62 | ||
63 | #define GET_TAG(_entry, _way) \ | |
64 | ((unsigned long long)(_entry)->tag_hi[(_way)] << 32 \ | |
65 | | (unsigned long long)(_entry)->tag_lo[(_way)]) | |
66 | ||
67 | #define SET_TAG(_entry, _way, _tag) \ | |
68 | (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF; \ | |
69 | (_entry)->tag_hi[(_way)] = (_tag) >> 32 | |
70 | ||
71 | #else /*__EA32__*/ | |
72 | typedef unsigned long addr; | |
73 | ||
74 | #define CHECK_TAG(_entry, _way, _tag) \ | |
75 | ((_entry)->tag_lo[(_way)] == (_tag)) | |
76 | ||
77 | #define GET_TAG(_entry, _way) \ | |
78 | ((_entry)->tag_lo[(_way)]) | |
79 | ||
80 | #define SET_TAG(_entry, _way, _tag) \ | |
81 | (_entry)->tag_lo[(_way)] = (_tag) | |
82 | ||
83 | #endif | |
84 | ||
85 | /* In GET_ENTRY, we cast away the high 32 bits, | |
86 | as the tag is only in the low 32. */ | |
87 | ||
88 | #define GET_ENTRY(_addr) \ | |
89 | ((struct __cache_tag_array *) \ | |
90 | si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \ | |
91 | si_from_uint (SET_MASK)), \ | |
92 | si_from_uint ((unsigned int) __cache_tag_array)))) | |
93 | ||
94 | #define GET_CACHE_LINE(_addr, _way) \ | |
95 | ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE)); | |
96 | ||
97 | #define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec)))) | |
98 | #define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1) | |
99 | #define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1) | |
100 | ||
101 | #define LS_FLAG 0x80000000 | |
102 | #define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG) | |
103 | #define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG) | |
104 | #define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG) | |
105 | ||
106 | static int dma_tag = 32; | |
107 | ||
108 | static void | |
109 | __cache_evict_entry (struct __cache_tag_array *entry, int way) | |
110 | { | |
111 | addr tag = GET_TAG (entry, way); | |
112 | ||
113 | if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way)) | |
114 | { | |
115 | #ifdef NONATOMIC | |
116 | /* Non-atomic writes. */ | |
117 | unsigned int oldmask, mach_stat; | |
118 | char *line = ((void *) 0); | |
119 | ||
120 | /* Enter critical section. */ | |
121 | mach_stat = spu_readch (SPU_RdMachStat); | |
122 | spu_idisable (); | |
123 | ||
124 | /* Issue DMA request. */ | |
125 | line = GET_CACHE_LINE (entry->tag_lo[way], way); | |
126 | mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0); | |
127 | ||
128 | /* Wait for DMA completion. */ | |
129 | oldmask = mfc_read_tag_mask (); | |
130 | mfc_write_tag_mask (1 << dma_tag); | |
131 | mfc_read_tag_status_all (); | |
132 | mfc_write_tag_mask (oldmask); | |
133 | ||
134 | /* Leave critical section. */ | |
135 | if (__builtin_expect (mach_stat & 1, 0)) | |
136 | spu_ienable (); | |
137 | #else | |
138 | /* Allocate a buffer large enough that we know it has 128 bytes | |
139 | that are 128 byte aligned (for DMA). */ | |
140 | ||
141 | char buffer[LINE_SIZE + 127]; | |
142 | qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127); | |
143 | qword *line = GET_CACHE_LINE (entry->tag_lo[way], way); | |
144 | qword bits; | |
145 | unsigned int mach_stat; | |
146 | ||
147 | /* Enter critical section. */ | |
148 | mach_stat = spu_readch (SPU_RdMachStat); | |
149 | spu_idisable (); | |
150 | ||
151 | do | |
152 | { | |
153 | /* We atomically read the current memory into a buffer | |
154 | modify the dirty bytes in the buffer, and write it | |
155 | back. If writeback fails, loop and try again. */ | |
156 | ||
157 | mfc_getllar (buf_ptr, tag, 0, 0); | |
158 | mfc_read_atomic_status (); | |
159 | ||
160 | /* The method we're using to write 16 dirty bytes into | |
161 | the buffer at a time uses fsmb which in turn uses | |
162 | the least significant 16 bits of word 0, so we | |
163 | load the bits and rotate so that the first bit of | |
164 | the bitmap is in the first bit that fsmb will use. */ | |
165 | ||
166 | bits = (qword) entry->dirty_bits[way]; | |
167 | bits = si_rotqbyi (bits, -2); | |
168 | ||
169 | /* Si_fsmb creates the mask of dirty bytes. | |
170 | Use selb to nab the appropriate bits. */ | |
171 | buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits)); | |
172 | ||
173 | /* Rotate to next 16 byte section of cache. */ | |
174 | bits = si_rotqbyi (bits, 2); | |
175 | ||
176 | buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits)); | |
177 | bits = si_rotqbyi (bits, 2); | |
178 | buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits)); | |
179 | bits = si_rotqbyi (bits, 2); | |
180 | buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits)); | |
181 | bits = si_rotqbyi (bits, 2); | |
182 | buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits)); | |
183 | bits = si_rotqbyi (bits, 2); | |
184 | buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits)); | |
185 | bits = si_rotqbyi (bits, 2); | |
186 | buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits)); | |
187 | bits = si_rotqbyi (bits, 2); | |
188 | buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits)); | |
189 | bits = si_rotqbyi (bits, 2); | |
190 | ||
191 | mfc_putllc (buf_ptr, tag, 0, 0); | |
192 | } | |
193 | while (mfc_read_atomic_status ()); | |
194 | ||
195 | /* Leave critical section. */ | |
196 | if (__builtin_expect (mach_stat & 1, 0)) | |
197 | spu_ienable (); | |
198 | #endif | |
199 | } | |
200 | ||
201 | /* In any case, marking the lo tag with 1 which denotes empty. */ | |
202 | SET_EMPTY (entry, way); | |
203 | entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0); | |
204 | } | |
205 | ||
206 | void | |
207 | __cache_evict (__ea void *ea) | |
208 | { | |
209 | addr tag = (addr) ea & ~TAG_MASK; | |
210 | struct __cache_tag_array *entry = GET_ENTRY (ea); | |
211 | int i = 0; | |
212 | ||
213 | /* Cycles through all the possible ways an address could be at | |
214 | and evicts the way if found. */ | |
215 | ||
216 | for (i = 0; i < WAYS; i++) | |
217 | if (CHECK_TAG (entry, i, tag)) | |
218 | __cache_evict_entry (entry, i); | |
219 | } | |
220 | ||
221 | static void * | |
222 | __cache_fill (int way, addr tag) | |
223 | { | |
224 | unsigned int oldmask, mach_stat; | |
225 | char *line = ((void *) 0); | |
226 | ||
227 | /* Reserve our DMA tag. */ | |
228 | if (dma_tag == 32) | |
229 | dma_tag = mfc_tag_reserve (); | |
230 | ||
231 | /* Enter critical section. */ | |
232 | mach_stat = spu_readch (SPU_RdMachStat); | |
233 | spu_idisable (); | |
234 | ||
235 | /* Issue DMA request. */ | |
236 | line = GET_CACHE_LINE (tag, way); | |
237 | mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0); | |
238 | ||
239 | /* Wait for DMA completion. */ | |
240 | oldmask = mfc_read_tag_mask (); | |
241 | mfc_write_tag_mask (1 << dma_tag); | |
242 | mfc_read_tag_status_all (); | |
243 | mfc_write_tag_mask (oldmask); | |
244 | ||
245 | /* Leave critical section. */ | |
246 | if (__builtin_expect (mach_stat & 1, 0)) | |
247 | spu_ienable (); | |
248 | ||
249 | return (void *) line; | |
250 | } | |
251 | ||
252 | static void | |
253 | __cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way) | |
254 | { | |
255 | ||
256 | addr tag = (addr) ea & ~TAG_MASK; | |
257 | unsigned int lru = 0; | |
258 | int i = 0; | |
259 | int idx = 0; | |
260 | ||
261 | /* If way > 4, then there are no empty slots, so we must evict | |
262 | the least recently used entry. */ | |
263 | if (way >= 4) | |
264 | { | |
265 | for (i = 0; i < WAYS; i++) | |
266 | { | |
267 | if (GET_LRU (entry, i) > lru) | |
268 | { | |
269 | lru = GET_LRU (entry, i); | |
270 | idx = i; | |
271 | } | |
272 | } | |
273 | __cache_evict_entry (entry, idx); | |
274 | way = idx; | |
275 | } | |
276 | ||
277 | /* Set the empty entry's tag and fill it's cache line. */ | |
278 | ||
279 | SET_TAG (entry, way, tag); | |
280 | entry->reserved[way] = 0; | |
281 | ||
282 | /* Check if the address is just an effective address within the | |
283 | SPU's local store. */ | |
284 | ||
285 | /* Because the LS is not 256k aligned, we can't do a nice and mask | |
286 | here to compare, so we must check the whole range. */ | |
287 | ||
288 | if ((addr) ea >= (addr) __ea_local_store | |
289 | && (addr) ea < (addr) (__ea_local_store + 0x40000)) | |
290 | { | |
291 | SET_IS_LS (entry, way); | |
292 | entry->base[way] = | |
293 | (void *) ((unsigned int) ((addr) ea - | |
294 | (addr) __ea_local_store) & ~0x7f); | |
295 | } | |
296 | else | |
297 | { | |
298 | entry->base[way] = __cache_fill (way, tag); | |
299 | } | |
300 | } | |
301 | ||
302 | void * | |
303 | __cache_fetch_dirty (__ea void *ea, int n_bytes_dirty) | |
304 | { | |
305 | #ifdef __EA64__ | |
306 | unsigned int tag_hi; | |
307 | qword etag_hi; | |
308 | #endif | |
309 | unsigned int tag_lo; | |
310 | struct __cache_tag_array *entry; | |
311 | ||
312 | qword etag_lo; | |
313 | qword equal; | |
314 | qword bit_mask; | |
315 | qword way; | |
316 | ||
317 | /* This first chunk, we merely fill the pointer and tag. */ | |
318 | ||
319 | entry = GET_ENTRY (ea); | |
320 | ||
321 | #ifndef __EA64__ | |
322 | tag_lo = | |
323 | si_to_uint (si_andc | |
324 | (si_shufb | |
325 | (si_from_uint ((addr) ea), si_from_uint (0), | |
326 | si_from_uint (0x00010203)), si_from_uint (TAG_MASK))); | |
327 | #else | |
328 | tag_lo = | |
329 | si_to_uint (si_andc | |
330 | (si_shufb | |
331 | (si_from_ullong ((addr) ea), si_from_uint (0), | |
332 | si_from_uint (0x04050607)), si_from_uint (TAG_MASK))); | |
333 | ||
334 | tag_hi = | |
335 | si_to_uint (si_shufb | |
336 | (si_from_ullong ((addr) ea), si_from_uint (0), | |
337 | si_from_uint (0x00010203))); | |
338 | #endif | |
339 | ||
340 | /* Increment LRU in reserved bytes. */ | |
341 | si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1), | |
342 | si_from_ptr (entry), 48); | |
343 | ||
344 | missreturn: | |
345 | /* Check if the entry's lo_tag is equal to the address' lo_tag. */ | |
346 | etag_lo = si_lqd (si_from_ptr (entry), 0); | |
347 | equal = si_ceq (etag_lo, si_from_uint (tag_lo)); | |
348 | #ifdef __EA64__ | |
349 | /* And the high tag too. */ | |
350 | etag_hi = si_lqd (si_from_ptr (entry), 16); | |
351 | equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi)))); | |
352 | #endif | |
353 | ||
354 | if ((si_to_uint (si_orx (equal)) == 0)) | |
355 | goto misshandler; | |
356 | ||
357 | if (n_bytes_dirty) | |
358 | { | |
359 | /* way = 0x40,0x50,0x60,0x70 for each way, which is also the | |
360 | offset of the appropriate dirty bits. */ | |
361 | way = si_shli (si_clz (si_gbb (equal)), 2); | |
362 | ||
363 | /* To create the bit_mask, we set it to all 1s (uint -1), then we | |
364 | shift it over (128 - n_bytes_dirty) times. */ | |
365 | ||
366 | bit_mask = si_from_uint (-1); | |
367 | ||
368 | bit_mask = | |
369 | si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8)); | |
370 | ||
371 | bit_mask = | |
372 | si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8)); | |
373 | ||
374 | /* Rotate it around to the correct offset. */ | |
375 | bit_mask = | |
376 | si_rotqby (bit_mask, | |
377 | si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8)); | |
378 | ||
379 | bit_mask = | |
380 | si_rotqbi (bit_mask, | |
381 | si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8)); | |
382 | ||
383 | /* Update the dirty bits. */ | |
384 | si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask), | |
385 | si_from_ptr (entry), way); | |
386 | }; | |
387 | ||
388 | /* We've definitely found the right entry, set LRU (reserved) to 0 | |
389 | maintaining the LS flag (MSB). */ | |
390 | ||
391 | si_stqd (si_andc | |
392 | (si_lqd (si_from_ptr (entry), 48), | |
393 | si_and (equal, si_from_uint (~(LS_FLAG)))), | |
394 | si_from_ptr (entry), 48); | |
395 | ||
396 | return (void *) | |
397 | si_to_uint (si_a | |
398 | (si_orx | |
399 | (si_and (si_lqd (si_from_ptr (entry), 32), equal)), | |
400 | si_from_uint (((unsigned int) (addr) ea) & TAG_MASK))); | |
401 | ||
402 | misshandler: | |
403 | equal = si_ceqi (etag_lo, 1); | |
404 | __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2); | |
405 | goto missreturn; | |
406 | } | |
407 | ||
408 | void * | |
409 | __cache_fetch (__ea void *ea) | |
410 | { | |
411 | return __cache_fetch_dirty (ea, 0); | |
412 | } | |
413 | ||
414 | void | |
415 | __cache_touch (__ea void *ea __attribute__ ((unused))) | |
416 | { | |
417 | /* NO-OP for now. */ | |
418 | } | |
419 | ||
420 | void __cache_flush (void) __attribute__ ((destructor)); | |
421 | void | |
422 | __cache_flush (void) | |
423 | { | |
424 | struct __cache_tag_array *entry = __cache_tag_array; | |
425 | unsigned int i; | |
426 | int j; | |
427 | ||
428 | /* Cycle through each cache entry and evict all used ways. */ | |
429 | ||
430 | for (i = 0; i < CACHE_LINES / WAYS; i++) | |
431 | { | |
432 | for (j = 0; j < WAYS; j++) | |
433 | if (!CHECK_EMPTY (entry, j)) | |
434 | __cache_evict_entry (entry, j); | |
435 | ||
436 | entry++; | |
437 | } | |
438 | } |