]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/arm/cortex-r4.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / arm / cortex-r4.md
CommitLineData
bd4dc3cd 1;; ARM Cortex-R4 scheduling description.
7adcbafe 2;; Copyright (C) 2007-2022 Free Software Foundation, Inc.
bd4dc3cd
PB
3;; Contributed by CodeSourcery.
4
5;; This file is part of GCC.
6
874101ec
PB
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
bd4dc3cd
PB
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
874101ec
PB
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
bd4dc3cd
PB
20
21(define_automaton "cortex_r4")
22
23;; We approximate the dual-issue constraints of this core using four
24;; "issue units" and a reservation matrix as follows. The numbers indicate
25;; the instruction groups' preferences in order. Multiple entries for
26;; the same numbered preference indicate units that must be reserved
27;; together.
28;;
29;; Issue unit: A B C ALU
30;;
31;; ALU w/o reg shift 1st 2nd 1st and 2nd
32;; ALU w/ reg shift 1st 2nd 2nd 1st and 2nd
33;; Moves 1st 2nd 2nd
34;; Multiplication 1st 1st
35;; Division 1st 1st
36;; Load/store single 1st 1st
37;; Other load/store 1st 1st
38;; Branches 1st
39
40(define_cpu_unit "cortex_r4_issue_a" "cortex_r4")
41(define_cpu_unit "cortex_r4_issue_b" "cortex_r4")
42(define_cpu_unit "cortex_r4_issue_c" "cortex_r4")
43(define_cpu_unit "cortex_r4_issue_alu" "cortex_r4")
44
45(define_reservation "cortex_r4_alu"
46 "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
47 (cortex_r4_issue_b+cortex_r4_issue_alu)")
48(define_reservation "cortex_r4_alu_shift_reg"
49 "(cortex_r4_issue_a+cortex_r4_issue_alu)|\
50 (cortex_r4_issue_b+cortex_r4_issue_c+\
51 cortex_r4_issue_alu)")
52(define_reservation "cortex_r4_mov"
53 "cortex_r4_issue_a|(cortex_r4_issue_b+\
54 cortex_r4_issue_alu)")
55(define_reservation "cortex_r4_mul" "cortex_r4_issue_a+cortex_r4_issue_alu")
56(define_reservation "cortex_r4_mul_2"
57 "(cortex_r4_issue_a+cortex_r4_issue_alu)*2")
58;; Division instructions execute out-of-order with respect to the
59;; rest of the pipeline and only require reservations on their first and
60;; final cycles.
61(define_reservation "cortex_r4_div_9"
62 "cortex_r4_issue_a+cortex_r4_issue_alu,\
63 nothing*7,\
64 cortex_r4_issue_a+cortex_r4_issue_alu")
65(define_reservation "cortex_r4_div_10"
66 "cortex_r4_issue_a+cortex_r4_issue_alu,\
67 nothing*8,\
68 cortex_r4_issue_a+cortex_r4_issue_alu")
69(define_reservation "cortex_r4_load_store"
70 "cortex_r4_issue_a+cortex_r4_issue_c")
71(define_reservation "cortex_r4_load_store_2"
72 "(cortex_r4_issue_a+cortex_r4_issue_b)*2")
73(define_reservation "cortex_r4_branch" "cortex_r4_issue_b")
74
75;; We assume that all instructions are unconditional.
76
77;; Data processing instructions. Moves without shifts are kept separate
78;; for the purposes of the dual-issue constraints above.
79(define_insn_reservation "cortex_r4_alu" 2
51c69ddb 80 (and (eq_attr "tune_cortexr4" "yes")
6e4150e1 81 (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
1d61feeb 82 alu_sreg,alus_sreg,logic_reg,logics_reg,\
6e4150e1 83 adc_imm,adcs_imm,adc_reg,adcs_reg,\
04ae06da 84 adr,bfm,clz,rbit,rev,\
6e4150e1 85 shift_imm,shift_reg,mvn_imm,mvn_reg"))
bd4dc3cd
PB
86 "cortex_r4_alu")
87
88(define_insn_reservation "cortex_r4_mov" 2
51c69ddb 89 (and (eq_attr "tune_cortexr4" "yes")
859abddd 90 (eq_attr "type" "mov_imm,mov_reg"))
bd4dc3cd
PB
91 "cortex_r4_mov")
92
93(define_insn_reservation "cortex_r4_alu_shift" 2
51c69ddb 94 (and (eq_attr "tune_cortexr4" "yes")
ae27ce51 95 (eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alus_shift_imm,\
6e4150e1
JG
96 logic_shift_imm,logics_shift_imm,\
97 extend,mov_shift,mvn_shift"))
bd4dc3cd
PB
98 "cortex_r4_alu")
99
100(define_insn_reservation "cortex_r4_alu_shift_reg" 2
51c69ddb 101 (and (eq_attr "tune_cortexr4" "yes")
6e4150e1
JG
102 (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
103 logic_shift_reg,logics_shift_reg,\
594726e4 104 mov_shift_reg,mvn_shift_reg,\
f62281dc 105 mrs,multiple"))
bd4dc3cd
PB
106 "cortex_r4_alu_shift_reg")
107
108;; An ALU instruction followed by an ALU instruction with no early dep.
109(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
110 cortex_r4_mov"
111 "cortex_r4_alu")
112(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
113 cortex_r4_mov"
114 "cortex_r4_alu_shift"
115 "arm_no_early_alu_shift_dep")
116(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
117 cortex_r4_mov"
118 "cortex_r4_alu_shift_reg"
119 "arm_no_early_alu_shift_value_dep")
120
121;; In terms of availabilities, a consumer mov could theoretically be
122;; issued together with a producer ALU instruction, without stalls.
123;; In practice this cannot happen because mov;add (in that order) is not
124;; eligible for dual issue and furthermore dual issue is not permitted
125;; when a dependency is involved. We therefore note it as latency one.
126;; A mov followed by another of the same is also latency one.
127(define_bypass 1 "cortex_r4_alu,cortex_r4_alu_shift,cortex_r4_alu_shift_reg,\
128 cortex_r4_mov"
129 "cortex_r4_mov")
130
131;; qadd, qdadd, qsub and qdsub are not currently emitted, and neither are
132;; media data processing instructions nor sad instructions.
133
134;; Multiplication instructions.
135
136(define_insn_reservation "cortex_r4_mul_4" 4
51c69ddb 137 (and (eq_attr "tune_cortexr4" "yes")
09485a08 138 (eq_attr "type" "mul,smmul"))
bd4dc3cd
PB
139 "cortex_r4_mul_2")
140
141(define_insn_reservation "cortex_r4_mul_3" 3
51c69ddb 142 (and (eq_attr "tune_cortexr4" "yes")
09485a08 143 (eq_attr "type" "smulxy,smulwy,smuad,smusd"))
bd4dc3cd
PB
144 "cortex_r4_mul")
145
146(define_insn_reservation "cortex_r4_mla_4" 4
51c69ddb 147 (and (eq_attr "tune_cortexr4" "yes")
09485a08 148 (eq_attr "type" "mla,smmla"))
bd4dc3cd
PB
149 "cortex_r4_mul_2")
150
151(define_insn_reservation "cortex_r4_mla_3" 3
51c69ddb 152 (and (eq_attr "tune_cortexr4" "yes")
09485a08 153 (eq_attr "type" "smlaxy,smlawy,smlad,smlsd"))
bd4dc3cd
PB
154 "cortex_r4_mul")
155
156(define_insn_reservation "cortex_r4_smlald" 3
51c69ddb 157 (and (eq_attr "tune_cortexr4" "yes")
09485a08 158 (eq_attr "type" "smlald,smlsld"))
bd4dc3cd
PB
159 "cortex_r4_mul")
160
161(define_insn_reservation "cortex_r4_mull" 4
51c69ddb 162 (and (eq_attr "tune_cortexr4" "yes")
09485a08 163 (eq_attr "type" "smull,umull,umlal,umaal"))
bd4dc3cd
PB
164 "cortex_r4_mul_2")
165
166;; A multiply or an MLA with a single-register result, followed by an
167;; MLA with an accumulator dependency, has its result forwarded.
168(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3"
169 "cortex_r4_mla_3,cortex_r4_mla_4"
170 "arm_mac_accumulator_is_mul_result")
171
172(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4"
173 "cortex_r4_mla_3,cortex_r4_mla_4"
174 "arm_mac_accumulator_is_mul_result")
175
176;; A multiply followed by an ALU instruction needing the multiply
177;; result only at ALU has lower latency than one needing it at Shift.
178(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
179 "cortex_r4_alu")
180(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
181 "cortex_r4_alu_shift"
182 "arm_no_early_alu_shift_dep")
183(define_bypass 2 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
184 "cortex_r4_alu_shift_reg"
185 "arm_no_early_alu_shift_value_dep")
186(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
187 "cortex_r4_alu")
188(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
189 "cortex_r4_alu_shift"
190 "arm_no_early_alu_shift_dep")
191(define_bypass 3 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
192 "cortex_r4_alu_shift_reg"
193 "arm_no_early_alu_shift_value_dep")
194
195;; A multiply followed by a mov has one cycle lower latency again.
196(define_bypass 1 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
197 "cortex_r4_mov")
198(define_bypass 2 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
199 "cortex_r4_mov")
200
201;; We guess that division of A/B using sdiv or udiv, on average,
202;; is performed with B having ten more leading zeros than A.
203;; This gives a latency of nine for udiv and ten for sdiv.
204(define_insn_reservation "cortex_r4_udiv" 9
51c69ddb 205 (and (eq_attr "tune_cortexr4" "yes")
09485a08 206 (eq_attr "type" "udiv"))
bd4dc3cd
PB
207 "cortex_r4_div_9")
208
209(define_insn_reservation "cortex_r4_sdiv" 10
51c69ddb 210 (and (eq_attr "tune_cortexr4" "yes")
09485a08 211 (eq_attr "type" "sdiv"))
bd4dc3cd
PB
212 "cortex_r4_div_10")
213
214;; Branches. We assume correct prediction.
215
216(define_insn_reservation "cortex_r4_branch" 0
51c69ddb 217 (and (eq_attr "tune_cortexr4" "yes")
bd4dc3cd
PB
218 (eq_attr "type" "branch"))
219 "cortex_r4_branch")
220
221;; Call latencies are not predictable. A semi-arbitrary very large
222;; number is used as "positive infinity" so that everything should be
223;; finished by the time of return.
224(define_insn_reservation "cortex_r4_call" 32
51c69ddb 225 (and (eq_attr "tune_cortexr4" "yes")
bd4dc3cd
PB
226 (eq_attr "type" "call"))
227 "nothing")
228
229;; Status register access instructions are not currently emitted.
230
231;; Load instructions.
232;; We do not model the "addr_md_3cycle" cases and assume that
233;; accesses following are correctly aligned.
234
235(define_insn_reservation "cortex_r4_load_1_2" 3
51c69ddb 236 (and (eq_attr "tune_cortexr4" "yes")
89b2133e 237 (eq_attr "type" "load_4,load_8"))
bd4dc3cd
PB
238 "cortex_r4_load_store")
239
240(define_insn_reservation "cortex_r4_load_3_4" 4
51c69ddb 241 (and (eq_attr "tune_cortexr4" "yes")
89b2133e 242 (eq_attr "type" "load_12,load_16"))
bd4dc3cd
PB
243 "cortex_r4_load_store_2")
244
245;; If a producing load is followed by an instruction consuming only
246;; as a Normal Reg, there is one fewer cycle of latency.
247
248(define_bypass 2 "cortex_r4_load_1_2"
249 "cortex_r4_alu")
250(define_bypass 2 "cortex_r4_load_1_2"
251 "cortex_r4_alu_shift"
252 "arm_no_early_alu_shift_dep")
253(define_bypass 2 "cortex_r4_load_1_2"
254 "cortex_r4_alu_shift_reg"
255 "arm_no_early_alu_shift_value_dep")
256
257(define_bypass 3 "cortex_r4_load_3_4"
258 "cortex_r4_alu")
259(define_bypass 3 "cortex_r4_load_3_4"
260 "cortex_r4_alu_shift"
261 "arm_no_early_alu_shift_dep")
262(define_bypass 3 "cortex_r4_load_3_4"
263 "cortex_r4_alu_shift_reg"
264 "arm_no_early_alu_shift_value_dep")
265
266;; If a producing load is followed by an instruction consuming only
267;; as a Late Reg, there are two fewer cycles of latency. Such consumer
268;; instructions are moves and stores.
269
270(define_bypass 1 "cortex_r4_load_1_2"
271 "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
272(define_bypass 2 "cortex_r4_load_3_4"
273 "cortex_r4_mov,cortex_r4_store_1_2,cortex_r4_store_3_4")
274
275;; If a producer's result is required as the base or offset of a load,
276;; there is an extra cycle latency.
277
278(define_bypass 3 "cortex_r4_alu,cortex_r4_mov,cortex_r4_alu_shift,\
279 cortex_r4_alu_shift_reg"
280 "cortex_r4_load_1_2,cortex_r4_load_3_4")
281
282(define_bypass 4 "cortex_r4_mul_3,cortex_r4_mla_3,cortex_r4_smlald"
283 "cortex_r4_load_1_2,cortex_r4_load_3_4")
284
285(define_bypass 5 "cortex_r4_mul_4,cortex_r4_mla_4,cortex_r4_mull"
286 "cortex_r4_load_1_2,cortex_r4_load_3_4")
287
288;; Store instructions.
289
290(define_insn_reservation "cortex_r4_store_1_2" 0
51c69ddb 291 (and (eq_attr "tune_cortexr4" "yes")
89b2133e 292 (eq_attr "type" "store_4,store_8"))
bd4dc3cd
PB
293 "cortex_r4_load_store")
294
295(define_insn_reservation "cortex_r4_store_3_4" 0
51c69ddb 296 (and (eq_attr "tune_cortexr4" "yes")
89b2133e 297 (eq_attr "type" "store_12,store_16"))
bd4dc3cd
PB
298 "cortex_r4_load_store_2")
299