]>
Commit | Line | Data |
---|---|---|
78d310c2 | 1 | ;; DFA scheduling description for ST40-300. |
a5544970 | 2 | ;; Copyright (C) 2004-2019 Free Software Foundation, Inc. |
78d310c2 R |
3 | |
4 | ;; This file is part of GCC. | |
5 | ||
6 | ;; GCC is free software; you can redistribute it and/or modify | |
7 | ;; it under the terms of the GNU General Public License as published by | |
2f83c7d6 | 8 | ;; the Free Software Foundation; either version 3, or (at your option) |
78d310c2 R |
9 | ;; any later version. |
10 | ||
11 | ;; GCC is distributed in the hope that it will be useful, | |
12 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | ;; GNU General Public License for more details. | |
15 | ||
16 | ;; You should have received a copy of the GNU General Public License | |
2f83c7d6 NC |
17 | ;; along with GCC; see the file COPYING3. If not see |
18 | ;; <http://www.gnu.org/licenses/>. | |
78d310c2 R |
19 | |
20 | ;; Load and store instructions save a cycle if they are aligned on a | |
21 | ;; four byte boundary. Using a function unit for stores encourages | |
22 | ;; gcc to separate load and store instructions by one instruction, | |
23 | ;; which makes it more likely that the linker will be able to word | |
24 | ;; align them when relaxing. | |
25 | ||
26 | ;; The following description models the ST40-300 pipeline using the DFA based | |
27 | ;; scheduler. | |
28 | ||
29 | ;; Two automata are defined to reduce number of states | |
30 | ;; which a single large automaton will have. (Factoring) | |
31 | ||
32 | (define_automaton "sh4_300_inst_pipeline,sh4_300_fpu_pipe") | |
33 | ||
34 | ;; This unit is basically the decode unit of the processor. | |
35 | ;; Since SH4 is a dual issue machine,it is as if there are two | |
36 | ;; units so that any insn can be processed by either one | |
37 | ;; of the decoding unit. | |
78d310c2 R |
38 | (define_cpu_unit "sh4_300_pipe_01,sh4_300_pipe_02" "sh4_300_inst_pipeline") |
39 | ||
40 | ;; The floating point units. | |
78d310c2 R |
41 | (define_cpu_unit "sh4_300_fpt,sh4_300_fpu,sh4_300_fds" "sh4_300_fpu_pipe") |
42 | ||
43 | ;; integer multiplier unit | |
78d310c2 R |
44 | (define_cpu_unit "sh4_300_mul" "sh4_300_inst_pipeline") |
45 | ||
46 | ;; LS unit | |
78d310c2 R |
47 | (define_cpu_unit "sh4_300_ls" "sh4_300_inst_pipeline") |
48 | ||
49 | ;; The address calculator used for branch instructions. | |
50 | ;; This will be reserved after "issue" of branch instructions | |
51 | ;; and this is to make sure that no two branch instructions | |
52 | ;; can be issued in parallel. | |
78d310c2 R |
53 | (define_cpu_unit "sh4_300_br" "sh4_300_inst_pipeline") |
54 | ||
55 | ;; ---------------------------------------------------- | |
56 | ;; This reservation is to simplify the dual issue description. | |
57 | ||
58 | (define_reservation "sh4_300_issue" "sh4_300_pipe_01|sh4_300_pipe_02") | |
59 | ||
60 | (define_reservation "all" "sh4_300_pipe_01+sh4_300_pipe_02") | |
61 | ||
62 | ;;(define_insn_reservation "nil" 0 (eq_attr "type" "nil") "nothing") | |
63 | ||
64 | ;; MOV RM,RN / MOV #imm8,RN / STS PR,RN | |
65 | (define_insn_reservation "sh4_300_mov" 0 | |
66 | (and (eq_attr "pipe_model" "sh4_300") | |
67 | (eq_attr "type" "move,movi8,prget")) | |
68 | "sh4_300_issue") | |
69 | ||
70 | ;; Fixed STS from MACL / MACH | |
71 | (define_insn_reservation "sh4_300_mac_gp" 0 | |
72 | (and (eq_attr "pipe_model" "sh4_300") | |
73 | (eq_attr "type" "mac_gp")) | |
74 | "sh4_300_issue+sh4_300_mul") | |
75 | ||
76 | ;; Fixed LDS to MACL / MACH | |
77 | (define_insn_reservation "sh4_300_gp_mac" 1 | |
78 | (and (eq_attr "pipe_model" "sh4_300") | |
79 | (eq_attr "type" "gp_mac")) | |
80 | "sh4_300_issue+sh4_300_mul") | |
81 | ||
82 | ;; Instructions without specific resource requirements with latency 1. | |
78d310c2 R |
83 | (define_insn_reservation "sh4_300_simple_arith" 1 |
84 | (and (eq_attr "pipe_model" "sh4_300") | |
85 | (eq_attr "type" "mt_group,arith,dyn_shift,prset")) | |
86 | "sh4_300_issue") | |
87 | ||
88 | ;; Load and store instructions have no alignment peculiarities for the ST40-300, | |
89 | ;; but they use the load-store unit, which they share with the fmove type | |
90 | ;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) . | |
91 | ;; Loads have a latency of three. | |
92 | ||
93 | ;; Load Store instructions. | |
94 | (define_insn_reservation "sh4_300_load" 3 | |
95 | (and (eq_attr "pipe_model" "sh4_300") | |
96 | (eq_attr "type" "load,pcload,load_si,pcload_si,pload")) | |
97 | "sh4_300_issue+sh4_300_ls") | |
98 | ||
99 | (define_insn_reservation "sh4_300_mac_load" 3 | |
100 | (and (eq_attr "pipe_model" "sh4_300") | |
101 | (eq_attr "type" "mem_mac")) | |
102 | "sh4_300_issue+sh4_300_ls+sh4_300_mul") | |
103 | ||
104 | (define_insn_reservation "sh4_300_fload" 4 | |
105 | (and (eq_attr "pipe_model" "sh4_300") | |
106 | (eq_attr "type" "fload,pcfload")) | |
107 | "sh4_300_issue+sh4_300_ls+sh4_300_fpt") | |
108 | ||
109 | ;; sh_adjust_cost describes the reduced latency of the feeding insns of a store. | |
110 | ;; The latency of an auto-increment register is 1; the latency of the memory | |
111 | ;; output is not actually considered here anyway. | |
112 | (define_insn_reservation "sh4_300_store" 1 | |
113 | (and (eq_attr "pipe_model" "sh4_300") | |
114 | (eq_attr "type" "store,pstore")) | |
115 | "sh4_300_issue+sh4_300_ls") | |
116 | ||
117 | (define_insn_reservation "sh4_300_fstore" 1 | |
118 | (and (eq_attr "pipe_model" "sh4_300") | |
119 | (eq_attr "type" "fstore")) | |
120 | "sh4_300_issue+sh4_300_ls+sh4_300_fpt") | |
121 | ||
122 | ;; Fixed STS.L from MACL / MACH | |
123 | (define_insn_reservation "sh4_300_mac_store" 1 | |
124 | (and (eq_attr "pipe_model" "sh4_300") | |
125 | (eq_attr "type" "mac_mem")) | |
126 | "sh4_300_issue+sh4_300_mul+sh4_300_ls") | |
127 | ||
128 | (define_insn_reservation "sh4_300_gp_fpul" 2 | |
129 | (and (eq_attr "pipe_model" "sh4_300") | |
130 | (eq_attr "type" "gp_fpul")) | |
131 | "sh4_300_issue+sh4_300_fpt") | |
132 | ||
133 | (define_insn_reservation "sh4_300_fpul_gp" 1 | |
134 | (and (eq_attr "pipe_model" "sh4_300") | |
135 | (eq_attr "type" "fpul_gp")) | |
136 | "sh4_300_issue+sh4_300_fpt") | |
137 | ||
138 | ;; Branch (BF,BF/S,BT,BT/S,BRA) | |
139 | ;; Branch Far (JMP,RTS,BRAF) | |
140 | ;; Group: BR | |
141 | ;; When displacement is 0 for BF / BT, we have effectively conditional | |
142 | ;; execution of one instruction, without pipeline disruption. | |
143 | ;; Otherwise, the latency depends on prediction success. | |
144 | ;; We can't really do much with the latency, even if we could express it, | |
145 | ;; but the pairing restrictions are useful to take into account. | |
146 | ;; ??? If the branch is likely, and not paired with a preceding insn, | |
147 | ;; or likely and likely not predicted, we might want to fill the delay slot. | |
148 | ;; However, there appears to be no machinery to make the compiler | |
149 | ;; recognize these scenarios. | |
78d310c2 R |
150 | (define_insn_reservation "sh4_300_branch" 1 |
151 | (and (eq_attr "pipe_model" "sh4_300") | |
152 | (eq_attr "type" "cbranch,jump,return,jump_ind")) | |
153 | "sh4_300_issue+sh4_300_br") | |
154 | ||
155 | ;; RTE | |
156 | (define_insn_reservation "sh4_300_return_from_exp" 9 | |
157 | (and (eq_attr "pipe_model" "sh4_300") | |
158 | (eq_attr "type" "rte")) | |
159 | "sh4_300_pipe_01+sh4_300_pipe_02*9") | |
160 | ||
161 | ;; OCBP, OCBWB | |
162 | ;; Group: CO | |
163 | ;; Latency: 1-5 | |
164 | ;; Issue Rate: 1 | |
50fe8924 OE |
165 | ;; cwb is used for the sequence |
166 | ;; ocbwb @%0 | |
167 | ;; extu.w %0,%2 | |
168 | ;; or %1,%2 | |
169 | ;; mov.l %0,@%2 | |
78d310c2 R |
170 | ;; This description is likely inexact, but this pattern should not actually |
171 | ;; appear when compiling for sh4-300; we should use isbi instead. | |
172 | ;; If a -mtune option is added later, we should use the icache array | |
173 | ;; dispatch method instead. | |
174 | (define_insn_reservation "sh4_300_ocbwb" 3 | |
175 | (and (eq_attr "pipe_model" "sh4_300") | |
176 | (eq_attr "type" "cwb")) | |
177 | "all*3") | |
178 | ||
179 | ;; JSR,BSR,BSRF | |
180 | ;; Calls have a mandatory delay slot, which we'd like to fill with an insn | |
181 | ;; that can be paired with the call itself. | |
182 | ;; Scheduling runs before reorg, so we approximate this by saying that we | |
183 | ;; want the call to be paired with a preceding insn. | |
184 | ;; In most cases, the insn that loads the address of the call should have | |
9f5ed61a | 185 | ;; a nonzero latency (mov rn,rm doesn't make sense since we could use rn |
78d310c2 | 186 | ;; for the address then). Thus, a preceding insn that can be paired with |
2f8e468b | 187 | ;; a call should be eligible for the delay slot. |
78d310c2 R |
188 | ;; |
189 | ;; calls introduce a longisch delay that is likely to flush the pipelines | |
190 | ;; of the caller's instructions. Ordinary functions tend to end with a | |
191 | ;; load to restore a register (in the delay slot of rts), while sfuncs | |
192 | ;; tend to end with an EX or MT insn. But that is not actually relevant, | |
193 | ;; since there are no instructions that contend for memory access early. | |
194 | ;; We could, of course, provide exact scheduling information for specific | |
195 | ;; sfuncs, if that should prove useful. | |
78d310c2 R |
196 | (define_insn_reservation "sh4_300_call" 16 |
197 | (and (eq_attr "pipe_model" "sh4_300") | |
198 | (eq_attr "type" "call,sfunc")) | |
199 | "sh4_300_issue+sh4_300_br,all*15") | |
200 | ||
201 | ;; FMOV.S / FMOV.D | |
202 | (define_insn_reservation "sh4_300_fmov" 1 | |
203 | (and (eq_attr "pipe_model" "sh4_300") | |
204 | (eq_attr "type" "fmove")) | |
205 | "sh4_300_issue+sh4_300_fpt") | |
206 | ||
207 | ;; LDS to FPSCR | |
208 | (define_insn_reservation "sh4_300_fpscr_load" 8 | |
209 | (and (eq_attr "pipe_model" "sh4_300") | |
210 | (eq_attr "type" "gp_fpscr")) | |
211 | "sh4_300_issue+sh4_300_fpu+sh4_300_fpt") | |
212 | ||
213 | ;; LDS.L to FPSCR | |
214 | (define_insn_reservation "sh4_300_fpscr_load_mem" 8 | |
215 | (and (eq_attr "pipe_model" "sh4_300") | |
216 | (eq_attr "type" "mem_fpscr")) | |
217 | "sh4_300_issue+sh4_300_fpu+sh4_300_fpt+sh4_300_ls") | |
218 | ||
219 | \f | |
220 | ;; Fixed point multiplication (DMULS.L DMULU.L MUL.L MULS.W,MULU.W) | |
221 | (define_insn_reservation "multi" 2 | |
222 | (and (eq_attr "pipe_model" "sh4_300") | |
223 | (eq_attr "type" "smpy,dmpy")) | |
224 | "sh4_300_issue+sh4_300_mul") | |
225 | ||
226 | ;; FPCHG, FRCHG, FSCHG | |
227 | (define_insn_reservation "fpscr_toggle" 1 | |
228 | (and (eq_attr "pipe_model" "sh4_300") | |
229 | (eq_attr "type" "fpscr_toggle")) | |
230 | "sh4_300_issue+sh4_300_fpu+sh4_300_fpt") | |
231 | ||
232 | ;; FCMP/EQ, FCMP/GT | |
233 | (define_insn_reservation "fp_cmp" 3 | |
234 | (and (eq_attr "pipe_model" "sh4_300") | |
235 | (eq_attr "type" "fp_cmp,dfp_cmp")) | |
236 | "sh4_300_issue+sh4_300_fpu") | |
237 | ||
238 | ;; Single precision floating point (FADD,FLOAT,FMAC,FMUL,FSUB,FTRC) | |
239 | ;; Double-precision floating-point (FADD,FCNVDS,FCNVSD,FLOAT,FSUB,FTRC) | |
240 | (define_insn_reservation "fp_arith" 6 | |
241 | (and (eq_attr "pipe_model" "sh4_300") | |
242 | (eq_attr "type" "fp,ftrc_s,dfp_arith,dfp_conv")) | |
243 | "sh4_300_issue+sh4_300_fpu") | |
244 | ||
245 | ;; Single Precision FDIV/SQRT | |
246 | (define_insn_reservation "fp_div" 19 | |
247 | (and (eq_attr "pipe_model" "sh4_300") | |
248 | (eq_attr "type" "fdiv")) | |
249 | "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*15") | |
250 | ||
251 | ;; Double-precision floating-point FMUL | |
252 | (define_insn_reservation "dfp_mul" 9 | |
253 | (and (eq_attr "pipe_model" "sh4_300") | |
254 | (eq_attr "type" "dfp_mul")) | |
255 | "sh4_300_issue+sh4_300_fpu,sh4_300_fpu*3") | |
256 | ||
257 | ;; Double precision FDIV/SQRT | |
258 | (define_insn_reservation "dp_div" 35 | |
259 | (and (eq_attr "pipe_model" "sh4_300") | |
260 | (eq_attr "type" "dfdiv")) | |
261 | "sh4_300_issue+sh4_300_fpu+sh4_300_fds,sh4_300_fds*31") | |
262 | ||
78d310c2 R |
263 | ;; ??? We don't really want these for sh4-300. |
264 | ;; this pattern itself is likely to finish in 3 cycles, but also | |
265 | ;; to disrupt branch prediction for taken branches for the following | |
266 | ;; condbranch. | |
267 | (define_insn_reservation "sh4_300_arith3" 5 | |
268 | (and (eq_attr "pipe_model" "sh4_300") | |
269 | (eq_attr "type" "arith3")) | |
270 | "sh4_300_issue,all*4") | |
271 | ||
272 | ;; arith3b insns without brach redirection make use of the 0-offset 0-latency | |
273 | ;; branch feature, and thus schedule the same no matter if the branch is taken | |
274 | ;; or not. If the branch is redirected, the taken branch might take longer, | |
275 | ;; but then, we don't have to take the next branch. | |
276 | ;; ??? should we suppress branch redirection for sh4-300 to improve branch | |
277 | ;; target hit rates? | |
278 | (define_insn_reservation "arith3b" 2 | |
279 | (and (eq_attr "pipe_model" "sh4") | |
280 | (eq_attr "type" "arith3")) | |
281 | "issue,all") |