]>
Commit | Line | Data |
---|---|---|
a2cd141b | 1 | ;; ARM 926EJ-S Pipeline Description |
f1717362 | 2 | ;; Copyright (C) 2003-2016 Free Software Foundation, Inc. |
a2cd141b | 3 | ;; Written by CodeSourcery, LLC. |
4 | ;; | |
5 | ;; This file is part of GCC. | |
6 | ;; | |
7 | ;; GCC is free software; you can redistribute it and/or modify it | |
8 | ;; under the terms of the GNU General Public License as published by | |
038d1e19 | 9 | ;; the Free Software Foundation; either version 3, or (at your option) |
a2cd141b | 10 | ;; any later version. |
11 | ;; | |
12 | ;; GCC is distributed in the hope that it will be useful, but | |
13 | ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | ;; General Public License for more details. | |
16 | ;; | |
17 | ;; You should have received a copy of the GNU General Public License | |
038d1e19 | 18 | ;; along with GCC; see the file COPYING3. If not see |
19 | ;; <http://www.gnu.org/licenses/>. */ | |
a2cd141b | 20 | |
21 | ;; These descriptions are based on the information contained in the | |
22 | ;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM | |
23 | ;; Limited. | |
24 | ;; | |
25 | ||
26 | ;; This automaton provides a pipeline description for the ARM | |
27 | ;; 926EJ-S core. | |
28 | ;; | |
29 | ;; The model given here assumes that the condition for all conditional | |
30 | ;; instructions is "true", i.e., that all of the instructions are | |
31 | ;; actually executed. | |
32 | ||
33 | (define_automaton "arm926ejs") | |
34 | ||
35 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
36 | ;; Pipelines | |
37 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
38 | ||
39 | ;; There is a single pipeline | |
40 | ;; | |
41 | ;; The ALU pipeline has fetch, decode, execute, memory, and | |
42 | ;; write stages. We only need to model the execute, memory and write | |
43 | ;; stages. | |
44 | ||
45 | (define_cpu_unit "e,m,w" "arm926ejs") | |
46 | ||
47 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
48 | ;; ALU Instructions | |
49 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
50 | ||
51 | ;; ALU instructions require three cycles to execute, and use the ALU | |
52 | ;; pipeline in each of the three stages. The results are available | |
47ae02b7 | 53 | ;; after the execute stage has finished. |
a2cd141b | 54 | ;; |
55 | ;; If the destination register is the PC, the pipelines are stalled | |
56 | ;; for several cycles. That case is not modeled here. | |
57 | ||
58 | ;; ALU operations with no shifted operand | |
59 | (define_insn_reservation "9_alu_op" 1 | |
60 | (and (eq_attr "tune" "arm926ejs") | |
d82e788e | 61 | (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ |
112eda6f | 62 | alu_sreg,alus_sreg,logic_reg,logics_reg,\ |
d82e788e | 63 | adc_imm,adcs_imm,adc_reg,adcs_reg,\ |
64 | adr,bfm,rev,\ | |
65 | alu_shift_imm,alus_shift_imm,\ | |
66 | logic_shift_imm,logics_shift_imm,\ | |
67 | shift_imm,shift_reg,extend,\ | |
1aed5204 | 68 | mov_imm,mov_reg,mov_shift,\ |
1b7da4ac | 69 | mvn_imm,mvn_reg,mvn_shift,\ |
70 | multiple,no_insn")) | |
a2cd141b | 71 | "e,m,w") |
72 | ||
73 | ;; ALU operations with a shift-by-register operand | |
74 | ;; These really stall in the decoder, in order to read | |
75 | ;; the shift value in a second cycle. Pretend we take two cycles in | |
76 | ;; the execute stage. | |
77 | (define_insn_reservation "9_alu_shift_reg_op" 2 | |
78 | (and (eq_attr "tune" "arm926ejs") | |
d82e788e | 79 | (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ |
80 | logic_shift_reg,logics_shift_reg,\ | |
81 | mov_shift_reg,mvn_shift_reg")) | |
a2cd141b | 82 | "e*2,m,w") |
83 | ||
84 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
85 | ;; Multiplication Instructions | |
86 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
87 | ||
88 | ;; Multiplication instructions loop in the execute stage until the | |
89 | ;; instruction has been passed through the multiplier array enough | |
90 | ;; times. Multiply operations occur in both the execute and memory | |
91 | ;; stages of the pipeline | |
92 | ||
93 | (define_insn_reservation "9_mult1" 3 | |
94 | (and (eq_attr "tune" "arm926ejs") | |
9da0ec36 | 95 | (eq_attr "type" "smlalxy,mul,mla")) |
a2cd141b | 96 | "e*2,m,w") |
97 | ||
98 | (define_insn_reservation "9_mult2" 4 | |
99 | (and (eq_attr "tune" "arm926ejs") | |
9da0ec36 | 100 | (eq_attr "type" "muls,mlas")) |
a2cd141b | 101 | "e*3,m,w") |
102 | ||
103 | (define_insn_reservation "9_mult3" 4 | |
104 | (and (eq_attr "tune" "arm926ejs") | |
9da0ec36 | 105 | (eq_attr "type" "umull,umlal,smull,smlal")) |
a2cd141b | 106 | "e*3,m,w") |
107 | ||
108 | (define_insn_reservation "9_mult4" 5 | |
109 | (and (eq_attr "tune" "arm926ejs") | |
9da0ec36 | 110 | (eq_attr "type" "umulls,umlals,smulls,smlals")) |
a2cd141b | 111 | "e*4,m,w") |
112 | ||
113 | (define_insn_reservation "9_mult5" 2 | |
114 | (and (eq_attr "tune" "arm926ejs") | |
9da0ec36 | 115 | (eq_attr "type" "smulxy,smlaxy,smlawx")) |
a2cd141b | 116 | "e,m,w") |
117 | ||
118 | (define_insn_reservation "9_mult6" 3 | |
119 | (and (eq_attr "tune" "arm926ejs") | |
9da0ec36 | 120 | (eq_attr "type" "smlalxy")) |
a2cd141b | 121 | "e*2,m,w") |
122 | ||
123 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
124 | ;; Load/Store Instructions | |
125 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
126 | ||
127 | ;; The models for load/store instructions do not accurately describe | |
128 | ;; the difference between operations with a base register writeback | |
129 | ;; (such as "ldm!"). These models assume that all memory references | |
130 | ;; hit in dcache. | |
131 | ||
132 | ;; Loads with a shifted offset take 3 cycles, and are (a) probably the | |
133 | ;; most common and (b) the pessimistic assumption will lead to fewer stalls. | |
134 | (define_insn_reservation "9_load1_op" 3 | |
135 | (and (eq_attr "tune" "arm926ejs") | |
136 | (eq_attr "type" "load1,load_byte")) | |
137 | "e*2,m,w") | |
138 | ||
139 | (define_insn_reservation "9_store1_op" 0 | |
140 | (and (eq_attr "tune" "arm926ejs") | |
141 | (eq_attr "type" "store1")) | |
142 | "e,m,w") | |
143 | ||
144 | ;; multiple word loads and stores | |
145 | (define_insn_reservation "9_load2_op" 3 | |
146 | (and (eq_attr "tune" "arm926ejs") | |
147 | (eq_attr "type" "load2")) | |
148 | "e,m*2,w") | |
149 | ||
150 | (define_insn_reservation "9_load3_op" 4 | |
151 | (and (eq_attr "tune" "arm926ejs") | |
152 | (eq_attr "type" "load3")) | |
153 | "e,m*3,w") | |
154 | ||
155 | (define_insn_reservation "9_load4_op" 5 | |
156 | (and (eq_attr "tune" "arm926ejs") | |
157 | (eq_attr "type" "load4")) | |
158 | "e,m*4,w") | |
159 | ||
160 | (define_insn_reservation "9_store2_op" 0 | |
161 | (and (eq_attr "tune" "arm926ejs") | |
162 | (eq_attr "type" "store2")) | |
163 | "e,m*2,w") | |
164 | ||
165 | (define_insn_reservation "9_store3_op" 0 | |
166 | (and (eq_attr "tune" "arm926ejs") | |
167 | (eq_attr "type" "store3")) | |
168 | "e,m*3,w") | |
169 | ||
170 | (define_insn_reservation "9_store4_op" 0 | |
171 | (and (eq_attr "tune" "arm926ejs") | |
172 | (eq_attr "type" "store4")) | |
173 | "e,m*4,w") | |
174 | ||
175 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
176 | ;; Branch and Call Instructions | |
177 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
178 | ||
179 | ;; Branch instructions are difficult to model accurately. The ARM | |
180 | ;; core can predict most branches. If the branch is predicted | |
181 | ;; correctly, and predicted early enough, the branch can be completely | |
182 | ;; eliminated from the instruction stream. Some branches can | |
183 | ;; therefore appear to require zero cycles to execute. We assume that | |
184 | ;; all branches are predicted correctly, and that the latency is | |
185 | ;; therefore the minimum value. | |
186 | ||
187 | (define_insn_reservation "9_branch_op" 0 | |
188 | (and (eq_attr "tune" "arm926ejs") | |
189 | (eq_attr "type" "branch")) | |
190 | "nothing") | |
191 | ||
192 | ;; The latency for a call is not predictable. Therefore, we use 32 as | |
58552ed0 | 193 | ;; roughly equivalent to positive infinity. |
a2cd141b | 194 | |
195 | (define_insn_reservation "9_call_op" 32 | |
196 | (and (eq_attr "tune" "arm926ejs") | |
197 | (eq_attr "type" "call")) | |
198 | "nothing") |