]>
Commit | Line | Data |
---|---|---|
9b66ebb1 PB |
1 | ;; ARM 926EJ-S Pipeline Description |
2 | ;; Copyright (C) 2003 Free Software Foundation, Inc. | |
3 | ;; Written by CodeSourcery, LLC. | |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
9 | ;; the Free Software Foundation; either version 2, or (at your option) | |
10 | ;; any later version. | |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
18 | ;; along with GCC; see the file COPYING. If not, write to the Free | |
19 | ;; Software Foundation, 59 Temple Place - Suite 330, Boston, MA | |
20 | ;; 02111-1307, USA. */ | |
21 | ||
22 | ;; These descriptions are based on the information contained in the | |
23 | ;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM | |
24 | ;; Limited. | |
25 | ;; | |
26 | ||
27 | ;; This automaton provides a pipeline description for the ARM | |
28 | ;; 926EJ-S core. | |
29 | ;; | |
30 | ;; The model given here assumes that the condition for all conditional | |
31 | ;; instructions is "true", i.e., that all of the instructions are | |
32 | ;; actually executed. | |
33 | ||
34 | (define_automaton "arm926ejs") | |
35 | ||
36 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
37 | ;; Pipelines | |
38 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
39 | ||
40 | ;; There is a single pipeline | |
41 | ;; | |
42 | ;; The ALU pipeline has fetch, decode, execute, memory, and | |
43 | ;; write stages. We only need to model the execute, memory and write | |
44 | ;; stages. | |
45 | ||
46 | (define_cpu_unit "e,m,w" "arm926ejs") | |
47 | ||
48 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
49 | ;; ALU Instructions | |
50 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
51 | ||
52 | ;; ALU instructions require three cycles to execute, and use the ALU | |
53 | ;; pipeline in each of the three stages. The results are available | |
54 | ;; after the execute stage stage has finished. | |
55 | ;; | |
56 | ;; If the destination register is the PC, the pipelines are stalled | |
57 | ;; for several cycles. That case is not modeled here. | |
58 | ||
59 | ;; ALU operations with no shifted operand | |
60 | (define_insn_reservation "9_alu_op" 1 | |
61 | (and (eq_attr "tune" "arm926ejs") | |
62 | (eq_attr "type" "alu,alu_shift")) | |
63 | "e,m,w") | |
64 | ||
65 | ;; ALU operations with a shift-by-register operand | |
66 | ;; These really stall in the decoder, in order to read | |
67 | ;; the shift value in a second cycle. Pretend we take two cycles in | |
68 | ;; the execute stage. | |
69 | (define_insn_reservation "9_alu_shift_reg_op" 2 | |
70 | (and (eq_attr "tune" "arm926ejs") | |
71 | (eq_attr "type" "alu_shift_reg")) | |
72 | "e*2,m,w") | |
73 | ||
74 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
75 | ;; Multiplication Instructions | |
76 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
77 | ||
78 | ;; Multiplication instructions loop in the execute stage until the | |
79 | ;; instruction has been passed through the multiplier array enough | |
80 | ;; times. Multiply operations occur in both the execute and memory | |
81 | ;; stages of the pipeline | |
82 | ||
83 | (define_insn_reservation "9_mult1" 3 | |
84 | (and (eq_attr "tune" "arm926ejs") | |
85 | (eq_attr "insn" "smlalxy,mul,mla")) | |
86 | "e*2,m,w") | |
87 | ||
88 | (define_insn_reservation "9_mult2" 4 | |
89 | (and (eq_attr "tune" "arm926ejs") | |
90 | (eq_attr "insn" "muls,mlas")) | |
91 | "e*3,m,w") | |
92 | ||
93 | (define_insn_reservation "9_mult3" 4 | |
94 | (and (eq_attr "tune" "arm926ejs") | |
95 | (eq_attr "insn" "umull,umlal,smull,smlal")) | |
96 | "e*3,m,w") | |
97 | ||
98 | (define_insn_reservation "9_mult4" 5 | |
99 | (and (eq_attr "tune" "arm926ejs") | |
100 | (eq_attr "insn" "umulls,umlals,smulls,smlals")) | |
101 | "e*4,m,w") | |
102 | ||
103 | (define_insn_reservation "9_mult5" 2 | |
104 | (and (eq_attr "tune" "arm926ejs") | |
105 | (eq_attr "insn" "smulxy,smlaxy,smlawx")) | |
106 | "e,m,w") | |
107 | ||
108 | (define_insn_reservation "9_mult6" 3 | |
109 | (and (eq_attr "tune" "arm926ejs") | |
110 | (eq_attr "insn" "smlalxy")) | |
111 | "e*2,m,w") | |
112 | ||
113 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
114 | ;; Load/Store Instructions | |
115 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
116 | ||
117 | ;; The models for load/store instructions do not accurately describe | |
118 | ;; the difference between operations with a base register writeback | |
119 | ;; (such as "ldm!"). These models assume that all memory references | |
120 | ;; hit in dcache. | |
121 | ||
122 | ;; Loads with a shifted offset take 3 cycles, and are (a) probably the | |
123 | ;; most common and (b) the pessimistic assumption will lead to fewer stalls. | |
124 | (define_insn_reservation "9_load1_op" 3 | |
125 | (and (eq_attr "tune" "arm926ejs") | |
126 | (eq_attr "type" "load1,load_byte")) | |
127 | "e*2,m,w") | |
128 | ||
129 | (define_insn_reservation "9_store1_op" 0 | |
130 | (and (eq_attr "tune" "arm926ejs") | |
131 | (eq_attr "type" "store1")) | |
132 | "e,m,w") | |
133 | ||
134 | ;; multiple word loads and stores | |
135 | (define_insn_reservation "9_load2_op" 3 | |
136 | (and (eq_attr "tune" "arm926ejs") | |
137 | (eq_attr "type" "load2")) | |
138 | "e,m*2,w") | |
139 | ||
140 | (define_insn_reservation "9_load3_op" 4 | |
141 | (and (eq_attr "tune" "arm926ejs") | |
142 | (eq_attr "type" "load3")) | |
143 | "e,m*3,w") | |
144 | ||
145 | (define_insn_reservation "9_load4_op" 5 | |
146 | (and (eq_attr "tune" "arm926ejs") | |
147 | (eq_attr "type" "load4")) | |
148 | "e,m*4,w") | |
149 | ||
150 | (define_insn_reservation "9_store2_op" 0 | |
151 | (and (eq_attr "tune" "arm926ejs") | |
152 | (eq_attr "type" "store2")) | |
153 | "e,m*2,w") | |
154 | ||
155 | (define_insn_reservation "9_store3_op" 0 | |
156 | (and (eq_attr "tune" "arm926ejs") | |
157 | (eq_attr "type" "store3")) | |
158 | "e,m*3,w") | |
159 | ||
160 | (define_insn_reservation "9_store4_op" 0 | |
161 | (and (eq_attr "tune" "arm926ejs") | |
162 | (eq_attr "type" "store4")) | |
163 | "e,m*4,w") | |
164 | ||
165 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
166 | ;; Branch and Call Instructions | |
167 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
168 | ||
169 | ;; Branch instructions are difficult to model accurately. The ARM | |
170 | ;; core can predict most branches. If the branch is predicted | |
171 | ;; correctly, and predicted early enough, the branch can be completely | |
172 | ;; eliminated from the instruction stream. Some branches can | |
173 | ;; therefore appear to require zero cycles to execute. We assume that | |
174 | ;; all branches are predicted correctly, and that the latency is | |
175 | ;; therefore the minimum value. | |
176 | ||
177 | (define_insn_reservation "9_branch_op" 0 | |
178 | (and (eq_attr "tune" "arm926ejs") | |
179 | (eq_attr "type" "branch")) | |
180 | "nothing") | |
181 | ||
182 | ;; The latency for a call is not predictable. Therefore, we use 32 as | |
183 | ;; roughly equivalent to postive infinity. | |
184 | ||
185 | (define_insn_reservation "9_call_op" 32 | |
186 | (and (eq_attr "tune" "arm926ejs") | |
187 | (eq_attr "type" "call")) | |
188 | "nothing") |