1 /* This file is part of the program psim.
3 Copyright (C) 1994-1995, Andrew Cagney <cagney@highland.com.au>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 /* endianness of the host/target:
28 If the build process is aware (at compile time) of the endianness
29 of the host/target it is able to eliminate slower generic endian
32 If ENDIAN_OK is true then no byte swapping is required. If it is
33 false, copy-in / copy-out functions assume that data should be byte
34 reversed as part of the copy. */
36 #define WITH_HOST_BYTE_ORDER 0 /*unknown*/
37 #define WITH_TARGET_BYTE_ORDER 0 /*unknown*/
39 extern int current_host_byte_order
;
40 extern int current_target_byte_order
;
41 #define CURRENT_HOST_BYTE_ORDER (WITH_HOST_BYTE_ORDER \
42 ? WITH_HOST_BYTE_ORDER \
43 : current_host_byte_order)
44 #define CURRENT_TARGET_BYTE_ORDER (WITH_TARGET_BYTE_ORDER \
45 ? WITH_TARGET_BYTE_ORDER \
46 : current_target_byte_order)
51 Sets a limit on the number of processors that can be simulated. If
52 WITH_SMP is set to zero (0), the simulator is restricted to
53 suporting only on processor (and as a consequence leaves the SMP
54 code out of the build process). */
61 /* Word size of host/target:
63 Set these according to your host and target requirements. At this
64 point in time, I've only compiled (not run) for a 64bit and never
65 built for a 64bit host. This will always remain a compile time
68 #ifndef WITH_TARGET_WORD_BITSIZE
69 #define WITH_TARGET_WORD_BITSIZE 32 /* compiled only */
71 #ifndef WITH_HOST_WORD_BITSIZE
72 #define WITH_HOST_WORD_BITSIZE 32 /* 64bit ready? */
76 /* Program environment:
78 Two environments are available. VEA (or virtual environment
79 architecture) and OEA (or operating environment architecture). The
80 former is the environment that a user program would see while the
81 latter is the environment as seen by an operating system. By
82 setting these to specific values, the build process is able to
83 eliminate non relevent environment code
85 CURRENT_ENVIRONMENT specifies which of vea or oea is required for
86 the current runtime. */
88 #define WITH_ENVIRONMENT 0
89 #define VIRTUAL_ENVIRONMENT 1
90 #define OPERATING_ENVIRONMENT 2
92 extern int current_environment
;
93 #define CURRENT_ENVIRONMENT (WITH_ENVIRONMENT \
95 : current_environment)
98 /* Optional VEA/OEA code:
100 The below, required for the OEA model may also be included in the
101 VEA model however, as far as I can tell only make things
105 /* Events. Devices modeling real H/W need to be able to efficiently
106 schedule things to do at known times in the future. The event
107 queue implements this. Unfortunatly this adds the need to check
108 for any events once each full instruction cycle. */
110 #define WITH_EVENTS (WITH_ENVIRONMENT != VIRTUAL_ENVIRONMENT)
115 The PowerPC architecture includes the addition of both a time base
116 register and a decrement timer. Like events adds to the overhead
117 of of some instruction cycles. */
119 #ifndef WITH_TIME_BASE
120 #define WITH_TIME_BASE 1
124 /* Callback/Default Memory.
126 Core includes a builtin memory type (raw_memory) that is
127 implemented using an array. raw_memory does not require any
128 additional functions etc.
130 Callback memory is where the core calls a core device for the data
133 Default memory is an extenstion of this where for addresses that do
134 not map into either a callback or core memory range a default map
137 The OEA model uses callback memory for devices and default memory
140 The VEA model uses callback memory to capture `page faults'.
142 While it may be possible to eliminate callback/default memory (and
143 hence also eliminate an additional test per memory fetch) it
144 probably is not worth the effort.
146 BTW, while raw_memory could have been implemented as a callback,
147 profiling has shown that there is a biger win (at least for the
148 x86) in eliminating a function call for the most common
149 (raw_memory) case. */
151 #define WITH_CALLBACK_MEMORY 1
156 The PowerPC may or may not handle miss aligned transfers. An
157 implementation normally handles miss aligned transfers in big
158 endian mode but generates an exception in little endian mode.
160 This model. Instead allows both little and big endian modes to
161 either take exceptions or handle miss aligned transfers.
163 If 0 is specified then for big-endian mode miss alligned accesses
164 are permitted (NONSTRICT_ALIGNMENT) while in little-endian mode the
165 processor will fault on them (STRICT_ALIGNMENT). */
167 #define NONSTRICT_ALIGNMENT 1
168 #define STRICT_ALIGNMENT 2
170 #ifndef WITH_ALIGNMENT
171 #define WITH_ALIGNMENT 0
173 extern int current_alignment
;
174 #define CURRENT_ALIGNMENT (WITH_ALIGNMENT \
179 /* Floating point suport:
181 Still under development. */
183 #define SOFT_FLOATING_POINT 1
184 #define HARD_FLOATING_POINT 2
186 #ifndef WITH_FLOATING_POINT
187 #define WITH_FLOATING_POINT HARD_FLOATING_POINT
189 extern int current_floating_point
;
190 #define CURRENT_FLOATING_POINT (WITH_FLOATING_POINT \
191 ? WITH_FLOATING_POINT \
192 : current_floating_point)
197 Control the inclusion of debugging code. */
199 /* Include the tracing code. Disabling this eliminates all tracing
206 /* include code that checks assertions scattered through out the
210 #define WITH_ASSERT 1
213 /* include profiling code that doesn't yet exist */
216 #define WITH_PROFILE 1
220 /* INSTRUCTION TABLE CODE GENERATION:
222 The program gen takes the files ppc.instructions and spr.table and
223 creates from them code that provides:
225 o instruction decode and issue
228 The program gen does this according to the configuration
229 information that follows. */
232 /* Line numbering of generated code:
234 When generating the semantic and idecode files, gen can also output
235 line number information (w.r.t. ppc.instructions). It may be
236 useful to disable this if you suspect that gen.c is incorrectly
237 generating itermediate code files. */
239 #ifndef WITH_LINE_NUMBERS
240 #define WITH_LINE_NUMBERS 1
244 /* Instruction cache:
246 Instead of the idecode routine calling the semantic function
247 directly, idecode can instead return a descriptor of the
248 instruction (cache entry).
250 With level one caching, idecode just returns the address of the
251 semantic function. With level two caching, in addition to this,
252 the idecode routine decodes key fields within the instruction and
253 also enters them into the cache. The table IDECODE_CACHE_RULES
254 controls what goes into the cache.*/
256 #ifndef WITH_IDECODE_CACHE
257 #define WITH_IDECODE_CACHE 0
259 #ifndef IDECODE_CACHE_SIZE
260 #define IDECODE_CACHE_SIZE 1024
264 /* Semantic code expansion:
266 For a given instruction there is the potential to improve
267 performance bo creating copies of the instructions code for one or
268 more of its possible variations. Eg branch being relative. This
269 macro determines of semantic functions should be expanded. How
270 well they are expanded is determined by the table
271 WITH_IDECODE_OPCODE_RULES. */
273 #ifndef WITH_IDECODE_EXPAND_SEMANTICS
274 #define WITH_IDECODE_EXPAND_SEMANTICS 0
280 The attributes of the SPR's are kept in a `lookup table'. This
281 table can be implemented as either a true table or a switch
284 A swith statement may be a performance advantage if the SPR's are
285 known at compile time. The compiler is then able to eliminate the
288 #ifndef WITH_SPREG_LOOKUP_TABLE
289 #define WITH_SPREG_LOOKUP_TABLE 1
293 /* Instruction decode:
295 The table that follows is used by gen to construct a decision tree
296 that can identify each possible instruction. Gen then outputs this
297 decision tree as (according to config) a table or switch statement
298 as the function idecode.
300 In parallel to this, as mentioned above, WITH_EXPANDED_SEMANTICS
301 determines of the semantic functions themselves should be expanded
304 The table contains the following entries:
308 Must be 1 for the entry to be considered. The last entry must be
314 Range of bits (within the instruction) that should be searched for
315 an instruction field. Within such ranges, gen looks for opcodes
316 (constants), registers (strings) and reserved bits (slash) and
317 according to the rules that follows includes or excludes them from
318 a possible instruction field.
323 If an instructioin field was found, enlarge the field size so that
324 it is forced to at least include bits starting from <force_first>
325 (<force_last>). To stop this occuring, use <force_first> = <last>
326 + 1 and <force_last> = <first> - 1.
330 Treat `/' fields as a constant instead of variable when looking for
331 an instruction field.
335 Treat any contained register (string) fields as constant when
336 determining the instruction field. For the instruction decode (and
337 controled by IDECODE_EXPAND_SEMANTICS) this forces the expansion of
338 what would otherwize be non constant bits of an instruction.
342 Should this table be expanded using a switch statement (val 1) and
343 if so, should it be padded with entries so as to force the compiler
344 to generate a jump table (val 2).
350 Special rule to fine tune how specific (or groups) of instructions
351 are expanded. The applicability of the rule is determined by
353 <special_mask> != 0 && (instruction> & <special_mask>) == <special_value>
355 Where <instruction> is obtained by looking only at constant fields
356 with in an instructions spec. When determining an expansion, the
357 rule is only considered when a node contains a single instruction.
358 <special_rule> can be any of:
360 0: for this instruction, expand by earlier rules
361 1: expand bits <force_low> .. <force_hi> only
362 2: boolean expansion of only zero/non-zero cases
367 #define WITH_IDECODE_OPCODE_RULES { \
368 { 1, 0, 5, 0, 5, 0, 0, 1, 0x00000000, 0x00000000, 0 }, \
369 { 1, 21, 31, 32, -1, 0, 0, 1, 0x00000000, 0x00000000, 0 }, \
374 /* Instruction unpacking:
376 Once the instruction has been decoded, the register (and other)
377 fields within the instruction need to be extracted.
379 The table that follows determines how each field should be treated.
380 Importantly it considers the case where the extracted field is to
381 be used immediatly or stored in an instruction cache.
385 Zero marks the end of the table. More importantly 1. indicates
386 that the entry is valid and can be cached. 2. indicates that that
387 the entry is valid but can not be cached.
391 The field name as given in the instruction spec.
395 A name for <old_name> once it has been extracted from the
396 instructioin (and possibly stored in the instruction cache).
400 String specifying the storage type for <new_name> (the extracted
405 Specifies how to get <new_name> from <old_name>. If null, old and
406 new name had better be the same. */
408 #define WITH_IDECODE_CACHE_RULES { \
409 { 1, "RA", "RA", 0, 0 }, \
410 { 1, "RA", "rA", "signed_word *", \
411 "(cpu_registers(processor)->gpr + RA)" }, \
412 { 1, "RT", "RT", 0, 0 }, \
413 { 1, "RT", "rT", "signed_word *", \
414 "(cpu_registers(processor)->gpr + RT)" }, \
415 { 2, "RS", "RS", 0, 0 }, \
416 { 1, "RS", "rS", "signed_word *", \
417 "(cpu_registers(processor)->gpr + RS)" }, \
418 { 2, "RB", "RB", 0, 0 }, \
419 { 1, "RB", "rB", "signed_word *", \
420 "(cpu_registers(processor)->gpr + RB)" }, \
421 { 2, "FRA", "FRA", 0, 0 }, \
422 { 1, "FRA", "frA", "unsigned64 *", \
423 "(cpu_registers(processor)->fpr + FRA)" }, \
424 { 2, "FRB", "FRB", 0, 0 }, \
425 { 1, "FRB", "frB", "unsigned64 *", \
426 "(cpu_registers(processor)->fpr + FRB)" }, \
427 { 2, "FRC", "FRC", 0, 0 }, \
428 { 1, "FRC", "frC", "unsigned64 *", \
429 "(cpu_registers(processor)->fpr + FRC)" }, \
430 { 2, "FRS", "FRS", 0, 0 }, \
431 { 1, "FRS", "frS", "unsigned64 *", \
432 "(cpu_registers(processor)->fpr + FRS)" }, \
433 { 2, "FRT", "FRT", 0, 0 }, \
434 { 1, "FRT", "frT", "unsigned64 *", \
435 "(cpu_registers(processor)->fpr + FRT)" }, \
436 { 1, "SI", "EXTS_SI", "unsigned_word", \
437 "((signed_word)(signed16)instruction)" }, \
438 { 2, "BI", "BI", 0, 0 }, \
439 { 1, "BI", "BIT32_BI", 0, \
441 { 2, "BA", "BA", 0, 0 }, \
442 { 1, "BA", "BIT32_BA", 0, \
444 { 2, "BB", "BB", 0, 0 }, \
445 { 1, "BB", "BIT32_BB", 0, \
447 { 1, "BD", "EXTS_BD_0b00", "unsigned_word", \
448 "(((signed_word)(signed16)instruction) & ~3)" }, \
449 /*{ 1, "BD", "CIA_plus_EXTS_BD_0b00", "unsigned_word", */ \
450 /* "CIA + EXTS(BD_0b00)" }, */ \
451 { 1, "LI", "EXTS_LI_0b00", "unsigned_word", \
452 "((((signed_word)(signed32)(instruction << 6)) >> 6) & ~0x3)" }, \
453 { 1, "D", "EXTS_D", "unsigned_word", \
454 "((signed_word)(signed16)(instruction))" }, \
455 { 1, "DS", "EXTS_DS_0b00", "unsigned_word", \
456 "(((signed_word)(signed16)instruction) & ~0x3)" }, \
462 /* INLINE CODE SELECTION:
464 GCC -O3 attempts to inline any function or procedure in scope. The
465 options below facilitate fine grained control over what is and what
466 isn't made inline. For instance it can control things down to a
467 specific modules static routines. This control is implemented in
468 two parts. Doing this allows the compiler to both eliminate the
469 overhead of function calls and (as a consequence) also eliminate
472 Experementing with CISC (x86) I've found that I can achieve an
473 order of magintude speed improvement (x3-x5). In the case of RISC
474 (sparc) while the performance gain isn't as great it is still
477 Part One - Static functions: It is possible to control how static
478 functions within each module are to be compiled. On a per module
479 or global basis, it is possible to specify that a modules static
480 functions should be compiled inline. This is controled by the the
481 macro's STATIC_INLINE and INLINE_STATIC_<module>.
483 Part Two - External functions: Again it is possible to allow the
484 inlining of calls to external functions. This is far more
485 complicated and much heaver on the compiler. In this case, it is
486 controled by the <module>_INLINE macro's. Where each can have a
489 0 ppc.c should call external module
491 1 ppc.c should have local copy (and hence possibly facilitate
492 the in lineing of that modules external calls)
494 2 ppc.c should inline this module
496 Finally, this is not for the faint harted. I've seen GCC get up to
497 200mb trying to compile what this can create */
499 /* Your compilers inline reserved word */
502 #if defined(__GNUC__) && defined(__OPTIMIZE__)
503 #define INLINE __inline__
505 #define INLINE /*inline*/
509 /* Default prefix for static functions */
511 #ifndef STATIC_INLINE
512 #define STATIC_INLINE static INLINE
515 /* Default macro to control several of the inlines */
517 #ifndef DEFAULT_INLINE
518 #define DEFAULT_INLINE 0
521 /* Code that does byte swapping used on any memory access */
523 #ifndef ENDIAN_INLINE
524 #define ENDIAN_INLINE DEFAULT_INLINE
527 /* Instruction cache if in use */
530 #ifndef ICACHE_INLINE
531 #define ICACHE_INLINE 0
535 /* Given a translated address, core maps it onto either simulator data
536 or a function call, this is performed once for each
537 data/instruction access */
541 #define CORE_INLINE DEFAULT_INLINE
545 /* The cpu object. May things call upon this module to manipulate
546 each cpu object for instance register updates (from semantics) or
547 instruction execution from psim */
550 #define VM_INLINE DEFAULT_INLINE
553 /* Physical memory is implemented using the memory map module */
556 #define CPU_INLINE DEFAULT_INLINE
559 /* handle the queue of events to happen in the future */
561 #ifndef EVENTS_INLINE
562 #define EVENTS_INLINE DEFAULT_INLINE
565 /* not so important register manipulation code. Most important
566 register operations are performed directly on the register file */
568 #ifndef REGISTERS_INLINE
569 #define REGISTERS_INLINE DEFAULT_INLINE
572 /* interrupt handling code */
574 #ifndef INTERRUPTS_INLINE
575 #define INTERRUPTS_INLINE DEFAULT_INLINE
578 /* device code. While possibly important, this isn't as critical as
581 There seems to be some problem with making either device_tree or
582 devices inline. It reports the message:
583 device_tree_find_node() not a leaf */
585 #ifndef DEVICE_TREE_INLINE
586 #define DEVICE_TREE_INLINE 0
589 #ifndef DEVICES_INLINE
590 #define DEVICES_INLINE 0
593 /* Special Purpose Register tables. Provide information on the
594 attributes of given SPR's. */
597 #define SPREG_INLINE DEFAULT_INLINE
600 /* Functions modeling the semantics of each instruction. Two cases to
601 consider, firstly of idecode is implemented with a switch then this
602 allows the idecode function to inline each semantic function
603 (avoiding a call). The second case is when idecode is using a
604 table, even then while the semantic functions can't be inlined,
605 setting it to one still enables each semantic function to inline
606 anything they call (if that code is marked for being inlined).
608 WARNING: you need lots (like 200mb of swap) of swap. Setting this
609 to 1 is useful when using a table as it enables the sematic code to
610 inline all of their called functions */
612 #ifndef SEMANTICS_INLINE
613 #define SEMANTICS_INLINE 0
616 /* Functions that decode an instruction. Called by the cpu module.
617 Part of the performance critical fetch - decode - issue sequence */
619 #ifndef IDECODE_INLINE
620 #define IDECODE_INLINE DEFAULT_INLINE
625 /* If you're confused by the above, check out some of the generic
626 configurations below. */
630 /* Allow the expansion of the semantic functions. That is, if the
631 branch instruction is called with AA=0 and AA=1, generate separate
632 functions for each case */
634 #undef WITH_IDECODE_EXPAND_SEMANTICS
635 #define WITH_IDECODE_EXPAND_SEMANTICS 1
637 #undef WITH_IDECODE_OPCODE_RULES
638 #define WITH_IDECODE_OPCODE_RULES { \
639 { 1, 0, 5, 0, 5, 0, 0, 0, 0x00000000, 0x00000000, 0 }, \
640 { 1, 21, 31, 32, -1, 0, "OE,LR,AA,Rc,LK", 0, 0x00000000, 0x00000000, 0 }, \
641 { 1, 6, 9, 6, 9, 0, "BO", 0, 0xfc000000, 0x40000000, 1 }, \
642 { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x38000000, 2 }, \
643 { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x3c000000, 2 }, \
650 /* eliminate any debugging noise */
656 #define WITH_ASSERT 0
662 /* A reasonable set of inline macro's that give the compiler a
663 fighting chance at eliminating much of the function call overhead.
665 Typically, with the below the -O3 option (to get inline of all
666 functioins) isn't of any greate benefit. */
669 #define INLINE inline
672 #define STATIC_INLINE static INLINE
675 #define ENDIAN_INLINE 2
679 #define ICACHE_INLINE 0
683 #define CORE_INLINE 2
692 #define EVENTS_INLINE 2
694 #undef REGISTERS_INLINE
695 #define REGISTERS_INLINE 2
697 #undef INTERRUPTS_INLINE
698 #define INTERRUPTS_INLINE 2
700 #undef DEVICE_TREE_INLINE
701 #define DEVICE_TREE_INLINE 0
703 #undef DEVICES_INLINE
704 #define DEVICES_INLINE 0
707 #define SPREG_INLINE 2
709 #undef SEMANTICS_INLINE
710 #define SEMANTICS_INLINE 1 /* not 2! as it blows away the compiler */
712 #undef IDECODE_INLINE
713 #define IDECODE_INLINE 2
719 /* Enable the full cracking cache. The cracked instruction cache
720 appears to give best performance if most functions have been lined
723 #undef WITH_IDECODE_CACHE
724 #define WITH_IDECODE_CACHE 2
731 /* With the VEA model, can eliminate some things. Not least of which
732 is support for the OEA model */
734 #undef WITH_ENVIRONMENT
735 #define WITH_ENVIRONMENT VIRTUAL_ENVIRONMENT
738 #define WITH_EVENTS 0
743 #undef WITH_TARGET_BYTE_ORDER
744 #define WITH_TARGET_BYTE_ORDER WITH_HOST_BYTE_ORDER
752 /* Finally, the expansion rules below are extreemly agressive. Only
753 consider them if your build machine is VERY VERY VERY VERY VERY
756 #undef WITH_IDECODE_EXPAND_SEMANTICS
757 #define WITH_IDECODE_EXPAND_SEMANTICS 1
759 #undef WITH_IDECODE_OPCODE_RULES
760 #define WITH_IDECODE_OPCODE_RULES { \
761 { 1, 0, 5, 0, 5, 0, 0, 0, 0x00000000, 0x00000000, 0 }, \
762 { 1, 21, 31, 32, -1, 0, "OE,LR,AA,Rc,LK", 0, 0x00000000, 0x00000000, 0 }, \
763 { 1, 6, 15, 6, 15, 0, "BO,BI", 0, 0xfc000000, 0x40000000, 0 }, \
764 { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x38000000, 0 }, \
765 { 1, 11, 15, 11, 15, 0, "RA", 0, 0xfc000000, 0x3c000000, 0 }, \
766 { 1, 11, 20, 11, 20, 0, "spr", 0, 0xfc000000, 0x7c000000, 0 }, \
772 #endif /* _CONFIG_H */