]>
Commit | Line | Data |
---|---|---|
c906108c | 1 | /* Simulator cache routines for CGEN simulators (and maybe others). |
3666a048 | 2 | Copyright (C) 1996-2021 Free Software Foundation, Inc. |
c906108c SS |
3 | Contributed by Cygnus Support. |
4 | ||
5 | This file is part of GDB, the GNU debugger. | |
6 | ||
7 | This program is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
4744ac1b JB |
9 | the Free Software Foundation; either version 3 of the License, or |
10 | (at your option) any later version. | |
c906108c SS |
11 | |
12 | This program is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
4744ac1b JB |
17 | You should have received a copy of the GNU General Public License |
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
c906108c SS |
19 | |
20 | #define SCACHE_DEFINE_INLINE | |
21 | ||
22 | #include "sim-main.h" | |
c906108c | 23 | #include <stdlib.h> |
c906108c SS |
24 | #include "libiberty.h" |
25 | #include "sim-options.h" | |
26 | #include "sim-io.h" | |
27 | ||
c906108c SS |
28 | /* Unused address. */ |
29 | #define UNUSED_ADDR 0xffffffff | |
30 | ||
31 | /* Scache configuration parameters. | |
32 | ??? Experiments to determine reasonable values is wip. | |
33 | These are just guesses. */ | |
34 | ||
35 | /* Default number of scache elements. | |
36 | The size of an element is typically 32-64 bytes, so the size of the | |
37 | default scache will be between 512K and 1M bytes. */ | |
38 | #ifdef CONFIG_SIM_CACHE_SIZE | |
39 | #define SCACHE_DEFAULT_CACHE_SIZE CONFIG_SIM_CACHE_SIZE | |
40 | #else | |
41 | #define SCACHE_DEFAULT_CACHE_SIZE 16384 | |
42 | #endif | |
43 | ||
44 | /* Minimum cache size. | |
45 | The m32r port assumes a cache size of at least 2 so it can decode both 16 | |
46 | bit insns. When compiling we need an extra for the chain entry. And this | |
47 | must be a multiple of 2. Hence 4 is the minimum (though, for those with | |
48 | featuritis or itchy pedantic bits, we could make this conditional on | |
49 | WITH_SCACHE_PBB). */ | |
50 | #define MIN_SCACHE_SIZE 4 | |
51 | ||
52 | /* Ratio of size of text section to size of scache. | |
53 | When compiling, we don't want to flush the scache more than we have to | |
54 | but we also don't want it to be exorbitantly(sp?) large. So we pick a high | |
55 | default value, then reduce it by the size of the program being simulated, | |
56 | but we don't override any value specified on the command line. | |
57 | If not specified on the command line, the size to use is computed as | |
58 | max (MIN_SCACHE_SIZE, | |
59 | min (DEFAULT_SCACHE_SIZE, | |
60 | text_size / (base_insn_size * INSN_SCACHE_RATIO))). */ | |
61 | /* ??? Interesting idea but not currently used. */ | |
62 | #define INSN_SCACHE_RATIO 4 | |
63 | ||
64 | /* Default maximum insn chain length. | |
65 | The only reason for a maximum is so we can place a maximum size on the | |
66 | profiling table. Chain lengths are determined by cti's. | |
67 | 32 is a more reasonable number, but when profiling, the before/after | |
68 | handlers take up that much more space. The scache is filled from front to | |
69 | back so all this determines is when the scache needs to be flushed. */ | |
70 | #define MAX_CHAIN_LENGTH 64 | |
71 | ||
72 | /* Default maximum hash list length. */ | |
73 | #define MAX_HASH_CHAIN_LENGTH 4 | |
74 | ||
75 | /* Minimum hash table size. */ | |
76 | #define MIN_HASH_CHAINS 32 | |
77 | ||
78 | /* Ratio of number of scache elements to number of hash lists. | |
79 | Since the user can only specify the size of the scache, we compute the | |
80 | size of the hash table as | |
81 | max (MIN_HASH_CHAINS, scache_size / SCACHE_HASH_RATIO). */ | |
82 | #define SCACHE_HASH_RATIO 8 | |
83 | ||
84 | /* Hash a PC value. | |
85 | FIXME: May wish to make the hashing architecture specific. | |
86 | FIXME: revisit */ | |
87 | #define HASH_PC(pc) (((pc) >> 2) + ((pc) >> 5)) | |
88 | ||
89 | static MODULE_INIT_FN scache_init; | |
90 | static MODULE_UNINSTALL_FN scache_uninstall; | |
91 | ||
92 | static DECLARE_OPTION_HANDLER (scache_option_handler); | |
93 | ||
94 | #define OPTION_PROFILE_SCACHE (OPTION_START + 0) | |
95 | ||
96 | static const OPTION scache_options[] = { | |
97 | { {"scache-size", optional_argument, NULL, 'c'}, | |
98 | 'c', "[SIZE]", "Specify size of simulator execution cache", | |
99 | scache_option_handler }, | |
100 | #if WITH_SCACHE_PBB | |
101 | /* ??? It might be nice to allow the user to specify the size of the hash | |
102 | table, the maximum hash list length, and the maximum chain length, but | |
103 | for now that might be more akin to featuritis. */ | |
104 | #endif | |
105 | { {"profile-scache", optional_argument, NULL, OPTION_PROFILE_SCACHE}, | |
106 | '\0', "on|off", "Perform simulator execution cache profiling", | |
107 | scache_option_handler }, | |
108 | { {NULL, no_argument, NULL, 0}, '\0', NULL, NULL, NULL } | |
109 | }; | |
110 | ||
111 | static SIM_RC | |
112 | scache_option_handler (SIM_DESC sd, sim_cpu *cpu, int opt, | |
113 | char *arg, int is_command) | |
114 | { | |
c906108c SS |
115 | switch (opt) |
116 | { | |
117 | case 'c' : | |
118 | if (WITH_SCACHE) | |
119 | { | |
120 | if (arg != NULL) | |
121 | { | |
13754e4c | 122 | unsigned int n = (unsigned int) strtoul (arg, NULL, 0); |
c906108c SS |
123 | if (n < MIN_SCACHE_SIZE) |
124 | { | |
13754e4c NC |
125 | sim_io_eprintf (sd, "invalid scache size `%u', must be at least %u", |
126 | n, MIN_SCACHE_SIZE); | |
c906108c SS |
127 | return SIM_RC_FAIL; |
128 | } | |
129 | /* Ensure it's a multiple of 2. */ | |
130 | if ((n & (n - 1)) != 0) | |
131 | { | |
13754e4c NC |
132 | unsigned int i; |
133 | sim_io_eprintf (sd, "scache size `%u' not a multiple of 2\n", n); | |
134 | /* Round up to nearest multiple of 2. */ | |
135 | for (i = 1; i && i < n; i <<= 1) | |
136 | continue; | |
137 | if (i) | |
138 | { | |
139 | n = i; | |
140 | sim_io_eprintf (sd, "rounding scache size up to %u\n", n); | |
141 | } | |
c906108c SS |
142 | } |
143 | if (cpu == NULL) | |
144 | STATE_SCACHE_SIZE (sd) = n; | |
145 | else | |
146 | CPU_SCACHE_SIZE (cpu) = n; | |
147 | } | |
148 | else | |
149 | { | |
150 | if (cpu == NULL) | |
151 | STATE_SCACHE_SIZE (sd) = SCACHE_DEFAULT_CACHE_SIZE; | |
152 | else | |
153 | CPU_SCACHE_SIZE (cpu) = SCACHE_DEFAULT_CACHE_SIZE; | |
154 | } | |
155 | } | |
156 | else | |
157 | sim_io_eprintf (sd, "Simulator execution cache not enabled, `--scache-size' ignored\n"); | |
158 | break; | |
159 | ||
160 | case OPTION_PROFILE_SCACHE : | |
161 | if (WITH_SCACHE && WITH_PROFILE_SCACHE_P) | |
162 | { | |
163 | /* FIXME: handle cpu != NULL. */ | |
164 | return sim_profile_set_option (sd, "-scache", PROFILE_SCACHE_IDX, | |
165 | arg); | |
166 | } | |
167 | else | |
168 | sim_io_eprintf (sd, "Simulator cache profiling not compiled in, `--profile-scache' ignored\n"); | |
169 | break; | |
170 | } | |
171 | ||
172 | return SIM_RC_OK; | |
173 | } | |
174 | ||
175 | SIM_RC | |
176 | scache_install (SIM_DESC sd) | |
177 | { | |
178 | sim_add_option_table (sd, NULL, scache_options); | |
179 | sim_module_add_init_fn (sd, scache_init); | |
180 | sim_module_add_uninstall_fn (sd, scache_uninstall); | |
181 | ||
182 | /* This is the default, it may be overridden on the command line. */ | |
183 | STATE_SCACHE_SIZE (sd) = WITH_SCACHE; | |
184 | ||
185 | return SIM_RC_OK; | |
186 | } | |
187 | ||
188 | static SIM_RC | |
189 | scache_init (SIM_DESC sd) | |
190 | { | |
191 | int c; | |
192 | ||
193 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
194 | { | |
195 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
196 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
197 | ||
198 | /* elm_size is 0 if the cpu doesn't not have scache support */ | |
199 | if (elm_size == 0) | |
200 | { | |
201 | CPU_SCACHE_SIZE (cpu) = 0; | |
202 | CPU_SCACHE_CACHE (cpu) = NULL; | |
203 | } | |
204 | else | |
205 | { | |
206 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
207 | CPU_SCACHE_SIZE (cpu) = STATE_SCACHE_SIZE (sd); | |
208 | CPU_SCACHE_CACHE (cpu) = | |
209 | (SCACHE *) xmalloc (CPU_SCACHE_SIZE (cpu) * elm_size); | |
210 | #if WITH_SCACHE_PBB | |
211 | CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) = MAX_CHAIN_LENGTH; | |
212 | CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu) = MAX_HASH_CHAIN_LENGTH; | |
bc273e17 | 213 | CPU_SCACHE_NUM_HASH_CHAINS (cpu) = max (MIN_HASH_CHAINS, |
c906108c SS |
214 | CPU_SCACHE_SIZE (cpu) |
215 | / SCACHE_HASH_RATIO); | |
216 | CPU_SCACHE_HASH_TABLE (cpu) = | |
217 | (SCACHE_MAP *) xmalloc (CPU_SCACHE_NUM_HASH_CHAINS (cpu) | |
218 | * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu) | |
219 | * sizeof (SCACHE_MAP)); | |
220 | CPU_SCACHE_PBB_BEGIN (cpu) = (SCACHE *) zalloc (elm_size); | |
221 | CPU_SCACHE_CHAIN_LENGTHS (cpu) = | |
222 | (unsigned long *) zalloc ((CPU_SCACHE_MAX_CHAIN_LENGTH (cpu) + 1) | |
223 | * sizeof (long)); | |
224 | #endif | |
225 | } | |
226 | } | |
227 | ||
228 | scache_flush (sd); | |
229 | ||
230 | return SIM_RC_OK; | |
231 | } | |
232 | ||
233 | static void | |
234 | scache_uninstall (SIM_DESC sd) | |
235 | { | |
236 | int c; | |
237 | ||
238 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
239 | { | |
240 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
241 | ||
242 | if (CPU_SCACHE_CACHE (cpu) != NULL) | |
243 | free (CPU_SCACHE_CACHE (cpu)); | |
244 | #if WITH_SCACHE_PBB | |
245 | if (CPU_SCACHE_HASH_TABLE (cpu) != NULL) | |
246 | free (CPU_SCACHE_HASH_TABLE (cpu)); | |
247 | if (CPU_SCACHE_PBB_BEGIN (cpu) != NULL) | |
248 | free (CPU_SCACHE_PBB_BEGIN (cpu)); | |
249 | if (CPU_SCACHE_CHAIN_LENGTHS (cpu) != NULL) | |
250 | free (CPU_SCACHE_CHAIN_LENGTHS (cpu)); | |
251 | #endif | |
252 | } | |
253 | } | |
254 | ||
255 | void | |
256 | scache_flush (SIM_DESC sd) | |
257 | { | |
258 | int c; | |
259 | ||
260 | for (c = 0; c < MAX_NR_PROCESSORS; ++c) | |
261 | { | |
262 | SIM_CPU *cpu = STATE_CPU (sd, c); | |
263 | scache_flush_cpu (cpu); | |
264 | } | |
265 | } | |
266 | ||
267 | void | |
268 | scache_flush_cpu (SIM_CPU *cpu) | |
269 | { | |
270 | int i,n; | |
271 | ||
272 | /* Don't bother if cache not in use. */ | |
273 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
274 | return; | |
275 | ||
276 | #if WITH_SCACHE_PBB | |
277 | /* It's important that this be reasonably fast as this can be done when | |
278 | the simulation is running. */ | |
279 | CPU_SCACHE_NEXT_FREE (cpu) = CPU_SCACHE_CACHE (cpu); | |
280 | n = CPU_SCACHE_NUM_HASH_CHAINS (cpu) * CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
281 | /* ??? Might be faster to just set the first entry, then update the | |
282 | "last entry" marker during allocation. */ | |
283 | for (i = 0; i < n; ++i) | |
284 | CPU_SCACHE_HASH_TABLE (cpu) [i] . pc = UNUSED_ADDR; | |
285 | #else | |
286 | { | |
287 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
288 | SCACHE *sc; | |
289 | ||
290 | /* Technically, this may not be necessary, but it helps debugging. */ | |
291 | memset (CPU_SCACHE_CACHE (cpu), 0, | |
292 | CPU_SCACHE_SIZE (cpu) * elm_size); | |
293 | ||
294 | for (i = 0, sc = CPU_SCACHE_CACHE (cpu); i < CPU_SCACHE_SIZE (cpu); | |
295 | ++i, sc = (SCACHE *) ((char *) sc + elm_size)) | |
296 | { | |
297 | sc->argbuf.addr = UNUSED_ADDR; | |
298 | } | |
299 | } | |
300 | #endif | |
301 | } | |
302 | ||
303 | #if WITH_SCACHE_PBB | |
304 | ||
305 | /* Look up PC in the hash table of scache entry points. | |
306 | Returns the entry or NULL if not found. */ | |
307 | ||
308 | SCACHE * | |
309 | scache_lookup (SIM_CPU *cpu, IADDR pc) | |
310 | { | |
96baa820 JM |
311 | /* FIXME: hash computation is wrong, doesn't take into account |
312 | NUM_HASH_CHAIN_ENTRIES. A lot of the hash table will be unused! */ | |
c906108c SS |
313 | unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1); |
314 | int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
315 | SCACHE_MAP *scm; | |
316 | ||
317 | /* We don't update hit/miss statistics as this is only used when recording | |
318 | branch target addresses. */ | |
319 | ||
320 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
321 | for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm) | |
322 | { | |
323 | if (scm->pc == pc) | |
324 | return scm->sc; | |
325 | } | |
326 | return 0; | |
327 | } | |
328 | ||
329 | /* Look up PC and if not found create an entry for it. | |
330 | If found the result is a pointer to the SCACHE entry. | |
331 | If not found the result is NULL, and the address of a buffer of at least | |
332 | N entries is stored in BUFP. | |
333 | It's done this way so the caller can still distinguish found/not-found. | |
334 | If the table is full, it is emptied to make room. | |
335 | If the maximum length of a hash list is reached a random entry is thrown out | |
336 | to make room. | |
337 | ??? One might want to try to make this smarter, but let's see some | |
338 | measurable benefit first. */ | |
339 | ||
340 | SCACHE * | |
341 | scache_lookup_or_alloc (SIM_CPU *cpu, IADDR pc, int n, SCACHE **bufp) | |
342 | { | |
96baa820 JM |
343 | /* FIXME: hash computation is wrong, doesn't take into account |
344 | NUM_HASH_CHAIN_ENTRIES. A lot of the hash table will be unused! */ | |
c906108c SS |
345 | unsigned int slot = HASH_PC (pc) & (CPU_SCACHE_NUM_HASH_CHAINS (cpu) - 1); |
346 | int i, max_i = CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu); | |
347 | SCACHE_MAP *scm; | |
348 | SCACHE *sc; | |
349 | ||
350 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
351 | for (i = 0; i < max_i && scm->pc != UNUSED_ADDR; ++i, ++scm) | |
352 | { | |
353 | if (scm->pc == pc) | |
354 | { | |
355 | PROFILE_COUNT_SCACHE_HIT (cpu); | |
356 | return scm->sc; | |
357 | } | |
358 | } | |
359 | PROFILE_COUNT_SCACHE_MISS (cpu); | |
360 | ||
361 | /* The address we want isn't cached. Bummer. | |
362 | If the hash chain we have for this address is full, throw out an entry | |
363 | to make room. */ | |
364 | ||
365 | if (i == max_i) | |
366 | { | |
367 | /* Rather than do something sophisticated like LRU, we just throw out | |
368 | a semi-random entry. Let someone else have the joy of saying how | |
369 | wrong this is. NEXT_FREE is the entry to throw out and cycles | |
370 | through all possibilities. */ | |
371 | static int next_free = 0; | |
372 | ||
373 | scm = & CPU_SCACHE_HASH_TABLE (cpu) [slot]; | |
96baa820 | 374 | /* FIXME: This seems rather clumsy. */ |
c906108c SS |
375 | for (i = 0; i < next_free; ++i, ++scm) |
376 | continue; | |
377 | ++next_free; | |
378 | if (next_free == CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu)) | |
379 | next_free = 0; | |
380 | } | |
381 | ||
382 | /* At this point SCM points to the hash table entry to use. | |
383 | Now make sure there's room in the cache. */ | |
96baa820 JM |
384 | /* FIXME: Kinda weird to use a next_free adjusted scm when cache is |
385 | flushed. */ | |
c906108c SS |
386 | |
387 | { | |
388 | int elm_size = IMP_PROPS_SCACHE_ELM_SIZE (MACH_IMP_PROPS (CPU_MACH (cpu))); | |
389 | int elms_used = (((char *) CPU_SCACHE_NEXT_FREE (cpu) | |
390 | - (char *) CPU_SCACHE_CACHE (cpu)) | |
391 | / elm_size); | |
392 | int elms_left = CPU_SCACHE_SIZE (cpu) - elms_used; | |
393 | ||
394 | if (elms_left < n) | |
395 | { | |
396 | PROFILE_COUNT_SCACHE_FULL_FLUSH (cpu); | |
397 | scache_flush_cpu (cpu); | |
398 | } | |
399 | } | |
400 | ||
401 | sc = CPU_SCACHE_NEXT_FREE (cpu); | |
402 | scm->pc = pc; | |
403 | scm->sc = sc; | |
404 | ||
405 | *bufp = sc; | |
406 | return NULL; | |
407 | } | |
408 | ||
409 | #endif /* WITH_SCACHE_PBB */ | |
410 | ||
411 | /* Print cache access statics for CPU. */ | |
412 | ||
413 | void | |
414 | scache_print_profile (SIM_CPU *cpu, int verbose) | |
415 | { | |
416 | SIM_DESC sd = CPU_STATE (cpu); | |
417 | unsigned long hits = CPU_SCACHE_HITS (cpu); | |
418 | unsigned long misses = CPU_SCACHE_MISSES (cpu); | |
419 | char buf[20]; | |
420 | unsigned long max_val; | |
421 | unsigned long *lengths; | |
422 | int i; | |
423 | ||
424 | if (CPU_SCACHE_SIZE (cpu) == 0) | |
425 | return; | |
426 | ||
427 | sim_io_printf (sd, "Simulator Cache Statistics\n\n"); | |
428 | ||
429 | /* One could use PROFILE_LABEL_WIDTH here. I chose not to. */ | |
430 | sim_io_printf (sd, " Cache size: %s\n", | |
431 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_SIZE (cpu))); | |
432 | sim_io_printf (sd, " Hits: %s\n", | |
433 | sim_add_commas (buf, sizeof (buf), hits)); | |
434 | sim_io_printf (sd, " Misses: %s\n", | |
435 | sim_add_commas (buf, sizeof (buf), misses)); | |
436 | if (hits + misses != 0) | |
437 | sim_io_printf (sd, " Hit rate: %.2f%%\n", | |
438 | ((double) hits / ((double) hits + (double) misses)) * 100); | |
439 | ||
440 | #if WITH_SCACHE_PBB | |
441 | sim_io_printf (sd, "\n"); | |
442 | sim_io_printf (sd, " Hash table size: %s\n", | |
443 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAINS (cpu))); | |
444 | sim_io_printf (sd, " Max hash list length: %s\n", | |
445 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_NUM_HASH_CHAIN_ENTRIES (cpu))); | |
446 | sim_io_printf (sd, " Max insn chain length: %s\n", | |
447 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_MAX_CHAIN_LENGTH (cpu))); | |
448 | sim_io_printf (sd, " Cache full flushes: %s\n", | |
449 | sim_add_commas (buf, sizeof (buf), CPU_SCACHE_FULL_FLUSHES (cpu))); | |
450 | sim_io_printf (sd, "\n"); | |
451 | ||
452 | if (verbose) | |
453 | { | |
454 | sim_io_printf (sd, " Insn chain lengths:\n\n"); | |
455 | max_val = 0; | |
456 | lengths = CPU_SCACHE_CHAIN_LENGTHS (cpu); | |
457 | for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i) | |
458 | if (lengths[i] > max_val) | |
459 | max_val = lengths[i]; | |
460 | for (i = 1; i < CPU_SCACHE_MAX_CHAIN_LENGTH (cpu); ++i) | |
461 | { | |
462 | sim_io_printf (sd, " %2d: %*s: ", | |
463 | i, | |
464 | max_val < 10000 ? 5 : 10, | |
465 | sim_add_commas (buf, sizeof (buf), lengths[i])); | |
ef93a840 | 466 | sim_profile_print_bar (sd, cpu, PROFILE_HISTOGRAM_WIDTH, |
c906108c SS |
467 | lengths[i], max_val); |
468 | sim_io_printf (sd, "\n"); | |
469 | } | |
470 | sim_io_printf (sd, "\n"); | |
471 | } | |
472 | #endif /* WITH_SCACHE_PBB */ | |
473 | } |