]>
Commit | Line | Data |
---|---|---|
ad41bd84 JM |
1 | ;; Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc. |
2 | ;; | |
3 | ;; This file is part of GCC. | |
4 | ;; | |
5 | ;; GCC is free software; you can redistribute it and/or modify | |
6 | ;; it under the terms of the GNU General Public License as published by | |
7 | ;; the Free Software Foundation; either version 3, or (at your option) | |
8 | ;; any later version. | |
9 | ;; | |
10 | ;; GCC is distributed in the hope that it will be useful, | |
11 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | ;; GNU General Public License for more details. | |
14 | ;; | |
15 | ;; You should have received a copy of the GNU General Public License | |
16 | ;; along with GCC; see the file COPYING3. If not see | |
17 | ;; <http://www.gnu.org/licenses/>. | |
18 | ;; | |
5ce6f47b EC |
19 | ;; ......................... |
20 | ;; | |
21 | ;; DFA-based pipeline description for Sandcraft SR3 (MIPS64 based) | |
22 | ;; | |
2a43945f | 23 | ;; The SR3 is described as: |
5ce6f47b | 24 | ;; - nine-stage pipeline, insn buffering with out-of-order issue to |
43aa4e05 | 25 | ;; multiple function units, with an average dispatch rate of 2 |
5ce6f47b EC |
26 | ;; insn.s per cycle (max 6 insns: 2 fpu, 4 cpu). |
27 | ;; | |
28 | ;; The details on this are scant except for a diagram in | |
29 | ;; Chap. 6 of Rev. 1.0 SR3 Spec. | |
30 | ;; | |
31 | ;; The model employed below is designed to closely approximate the | |
32 | ;; published latencies. Emulation of out-of-order issue and the insn | |
33 | ;; buffering is done via a VLIW dispatch style (with a packing of 6 insns); | |
34 | ;; the function unit reservations restrictions (define_*_set) are | |
35 | ;; contrived to support published timings. | |
36 | ;; | |
37 | ;; Reference: | |
1ae58c30 | 38 | ;; "SR3 Microprocessor Specification, System development information," |
5ce6f47b EC |
39 | ;; Revision 1.0, 13 December 2000. |
40 | ;; | |
41 | ;; | |
42 | ;; Reservation model is based on: | |
2a43945f KH |
43 | ;; 1) Figure 6-1, from the 1.0 specification. |
44 | ;; 2) Chapter 19, from the 1.0 specification. | |
5ce6f47b EC |
45 | ;; 3) following questions(Red Hat)/answers(Sandcraft): |
46 | ;; RH> From Section 19.1 | |
47 | ;; RH> 1) In terms of figure 6-1, are all the instructions in | |
48 | ;; RH> table 19-1 restricted | |
49 | ;; RH> to ALUx? When ALUx is not in use for an instruction in table;; RH> 19-1 is | |
50 | ;; RH> it fully compatible with all insns that issue to ALUy? | |
51 | ;; | |
52 | ;; Yes, all the instructions in Table 19-1 only go to ALUX, and all the | |
53 | ;; instructions that can be issued to ALUY can also be issued to ALUX. | |
54 | ;; | |
55 | ;; | |
56 | ;; RH> From Section 19.2 | |
57 | ;; RH> 2) Explain conditional moves execution path (in terms of | |
58 | ;; RH> figure 6-1) | |
59 | ;; | |
60 | ;; Conditional move of integer registers (based on floating point condition | |
61 | ;; codes or integer register value) go to ALUX or ALUY. | |
62 | ;; | |
63 | ;; RH> 3) Explain floating point store execution path (in terms of | |
64 | ;; RH> figure 6-1) | |
65 | ;; | |
66 | ;; Floating point stores go to Ld/St and go to MOV in the floating point | |
67 | ;; pipeline. | |
68 | ;; | |
69 | ;; Floating point loads go to Ld/St and go to LOAD in the floating point | |
70 | ;; pipeline. | |
71 | ;; | |
72 | ;; RH> 4) Explain branch on floating condition (in terms of figure 6-1);; | |
73 | ;; Branch on floating condition go to BRU. | |
74 | ;; | |
75 | ;; RH> 5) Is the column for single RECIP instruction latency correct? | |
76 | ;; RH> What about for RSQRT single and double? | |
77 | ;; | |
78 | ;; The latency/repeat for RECIP and RSQRT are correct. | |
79 | ;; | |
80 | ||
81 | ;; | |
82 | ;; Use four automata to isolate long latency operations, and to | |
83 | ;; reduce the complexity of cpu+fpu, reducing space. | |
84 | ;; | |
85 | (define_automaton "sr71_cpu, sr71_cpu1, sr71_cp1, sr71_cp2, sr71_fextra, sr71_imacc") | |
86 | ||
87 | ;; feeders for CPU function units and feeders for fpu (CP1 interface) | |
88 | (define_cpu_unit "sr_iss0,sr_iss1,sr_iss2,sr_iss3,sr_iss4,sr_iss5" "sr71_cpu") | |
89 | ||
90 | ;; CPU function units | |
91 | (define_cpu_unit "ipu_bru" "sr71_cpu1") | |
92 | (define_cpu_unit "ipu_alux" "sr71_cpu1") | |
93 | (define_cpu_unit "ipu_aluy" "sr71_cpu1") | |
94 | (define_cpu_unit "ipu_ldst" "sr71_cpu1") | |
95 | (define_cpu_unit "ipu_macc_iter" "sr71_imacc") | |
96 | ||
97 | ||
98 | ;; Floating-point unit (Co-processor interface 1). | |
99 | (define_cpu_unit "fpu_mov" "sr71_cp1") | |
100 | (define_cpu_unit "fpu_load" "sr71_cp1") | |
101 | (define_cpu_unit "fpu_fpu" "sr71_cp2") | |
102 | ||
103 | ;; fictitous unit to track long float insns with separate automaton | |
104 | (define_cpu_unit "fpu_iter" "sr71_fextra") | |
105 | ||
106 | ||
107 | ;; | |
108 | ;; Define common execution path (reservation) combinations | |
109 | ;; | |
110 | ||
111 | ;; | |
112 | (define_reservation "cpu_iss" "sr_iss0|sr_iss1|sr_iss2|sr_iss3") | |
113 | ||
114 | ;; two cycles are used for instruction using the fpu as it runs | |
115 | ;; at half the clock speed of the cpu. By adding an extra cycle | |
116 | ;; to the issue units, the default/minimum "repeat" dispatch delay is | |
117 | ;; accounted for all insn.s | |
118 | (define_reservation "cp1_iss" "(sr_iss4*2)|(sr_iss5*2)") | |
119 | ||
120 | (define_reservation "serial_dispatch" "sr_iss0+sr_iss1+sr_iss2+sr_iss3+sr_iss4+sr_iss5") | |
121 | ||
122 | ;; Simulate a 6 insn VLIW dispatch, 1 cycle in dispatch followed by | |
123 | ;; reservation of function unit. | |
124 | (define_reservation "ri_insns" "cpu_iss,(ipu_alux|ipu_aluy)") | |
125 | (define_reservation "ri_mem" "cpu_iss,ipu_ldst") | |
126 | (define_reservation "ri_alux" "cpu_iss,ipu_alux") | |
127 | (define_reservation "ri_branch" "cpu_iss,ipu_bru") | |
128 | ||
129 | (define_reservation "rf_insn" "cp1_iss,fpu_fpu") | |
130 | (define_reservation "rf_ldmem" "cp1_iss,fpu_load") | |
131 | ||
132 | ; simultaneous reservation of pseudo-unit keeps cp1 fpu tied | |
133 | ; up until long cycle insn is finished... | |
134 | (define_reservation "rf_multi1" "rf_insn+fpu_iter") | |
135 | ||
136 | ;; | |
137 | ;; The ordering of the instruction-execution-path/resource-usage | |
138 | ;; descriptions (also known as reservation RTL) is roughly ordered | |
139 | ;; based on the define attribute RTL for the "type" classification. | |
140 | ;; When modifying, remember that the first test that matches is the | |
141 | ;; reservation used! | |
142 | ;; | |
143 | ||
144 | ||
a8cdbec0 RS |
145 | (define_insn_reservation "ir_sr70_unknown" 1 |
146 | (and (eq_attr "cpu" "sr71000") | |
147 | (eq_attr "type" "unknown")) | |
148 | "serial_dispatch") | |
5ce6f47b EC |
149 | |
150 | ||
151 | ;; Assume prediction fails. | |
a8cdbec0 RS |
152 | (define_insn_reservation "ir_sr70_branch" 6 |
153 | (and (eq_attr "cpu" "sr71000") | |
154 | (eq_attr "type" "branch,jump,call")) | |
155 | "ri_branch") | |
5ce6f47b | 156 | |
a8cdbec0 RS |
157 | (define_insn_reservation "ir_sr70_load" 2 |
158 | (and (eq_attr "cpu" "sr71000") | |
159 | (eq_attr "type" "load")) | |
160 | "ri_mem") | |
5ce6f47b | 161 | |
a8cdbec0 RS |
162 | (define_insn_reservation "ir_sr70_store" 1 |
163 | (and (eq_attr "cpu" "sr71000") | |
164 | (eq_attr "type" "store")) | |
165 | "ri_mem") | |
5ce6f47b EC |
166 | |
167 | ||
168 | ;; | |
169 | ;; float loads/stores flow through both cpu and cp1... | |
170 | ;; | |
a8cdbec0 RS |
171 | (define_insn_reservation "ir_sr70_fload" 9 |
172 | (and (eq_attr "cpu" "sr71000") | |
173 | (eq_attr "type" "fpload,fpidxload")) | |
174 | "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)") | |
5ce6f47b | 175 | |
a8cdbec0 RS |
176 | (define_insn_reservation "ir_sr70_fstore" 1 |
177 | (and (eq_attr "cpu" "sr71000") | |
178 | (eq_attr "type" "fpstore,fpidxstore")) | |
179 | "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)") | |
5ce6f47b EC |
180 | |
181 | ||
182 | ;; This reservation is for conditional move based on integer | |
5a2515e6 | 183 | ;; or floating point CC. |
a8cdbec0 RS |
184 | (define_insn_reservation "ir_sr70_condmove" 4 |
185 | (and (eq_attr "cpu" "sr71000") | |
186 | (eq_attr "type" "condmove")) | |
187 | "ri_insns") | |
5ce6f47b EC |
188 | |
189 | ;; Try to discriminate move-from-cp1 versus move-to-cp1 as latencies | |
190 | ;; are different. Like float load/store, these insns use multiple | |
191 | ;; resources simultaneously | |
a8cdbec0 RS |
192 | (define_insn_reservation "ir_sr70_xfer_from" 6 |
193 | (and (eq_attr "cpu" "sr71000") | |
00f9e1ca | 194 | (eq_attr "type" "mfc")) |
a8cdbec0 RS |
195 | "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)") |
196 | ||
197 | (define_insn_reservation "ir_sr70_xfer_to" 9 | |
198 | (and (eq_attr "cpu" "sr71000") | |
00f9e1ca | 199 | (eq_attr "type" "mtc")) |
a8cdbec0 RS |
200 | "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)") |
201 | ||
202 | (define_insn_reservation "ir_sr70_hilo" 1 | |
203 | (and (eq_attr "cpu" "sr71000") | |
cb00489c | 204 | (eq_attr "type" "mthi,mtlo,mfhi,mflo")) |
a8cdbec0 RS |
205 | "ri_insns") |
206 | ||
207 | (define_insn_reservation "ir_sr70_arith" 1 | |
208 | (and (eq_attr "cpu" "sr71000") | |
b1a14b78 | 209 | (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,trap")) |
a8cdbec0 | 210 | "ri_insns") |
5ce6f47b EC |
211 | |
212 | ;; emulate repeat (dispatch stall) by spending extra cycle(s) in | |
213 | ;; in iter unit | |
a8cdbec0 RS |
214 | (define_insn_reservation "ir_sr70_imul_si" 4 |
215 | (and (eq_attr "cpu" "sr71000") | |
216 | (and (eq_attr "type" "imul,imul3,imadd") | |
217 | (eq_attr "mode" "SI"))) | |
218 | "ri_alux,ipu_alux,ipu_macc_iter") | |
219 | ||
220 | (define_insn_reservation "ir_sr70_imul_di" 6 | |
221 | (and (eq_attr "cpu" "sr71000") | |
222 | (and (eq_attr "type" "imul,imul3,imadd") | |
223 | (eq_attr "mode" "DI"))) | |
224 | "ri_alux,ipu_alux,(ipu_macc_iter*3)") | |
5ce6f47b EC |
225 | |
226 | ;; Divide algorithm is early out with best latency of 7 pcycles. | |
227 | ;; Use worst case for scheduling purposes. | |
a8cdbec0 RS |
228 | (define_insn_reservation "ir_sr70_idiv_si" 41 |
229 | (and (eq_attr "cpu" "sr71000") | |
230 | (and (eq_attr "type" "idiv") | |
231 | (eq_attr "mode" "SI"))) | |
232 | "ri_alux,ipu_alux,(ipu_macc_iter*38)") | |
233 | ||
234 | (define_insn_reservation "ir_sr70_idiv_di" 73 | |
235 | (and (eq_attr "cpu" "sr71000") | |
236 | (and (eq_attr "type" "idiv") | |
237 | (eq_attr "mode" "DI"))) | |
238 | "ri_alux,ipu_alux,(ipu_macc_iter*70)") | |
5ce6f47b | 239 | |
5ce6f47b | 240 | ;; extra reservations of fpu_fpu are for repeat latency |
a8cdbec0 RS |
241 | (define_insn_reservation "ir_sr70_fadd_sf" 8 |
242 | (and (eq_attr "cpu" "sr71000") | |
243 | (and (eq_attr "type" "fadd") | |
244 | (eq_attr "mode" "SF"))) | |
245 | "rf_insn,fpu_fpu") | |
246 | ||
247 | (define_insn_reservation "ir_sr70_fadd_df" 10 | |
248 | (and (eq_attr "cpu" "sr71000") | |
249 | (and (eq_attr "type" "fadd") | |
250 | (eq_attr "mode" "DF"))) | |
251 | "rf_insn,fpu_fpu") | |
5ce6f47b EC |
252 | |
253 | ;; Latencies for MADD,MSUB, NMADD, NMSUB assume the Multiply is fused | |
254 | ;; with the sub or add. | |
a8cdbec0 RS |
255 | (define_insn_reservation "ir_sr70_fmul_sf" 8 |
256 | (and (eq_attr "cpu" "sr71000") | |
257 | (and (eq_attr "type" "fmul,fmadd") | |
258 | (eq_attr "mode" "SF"))) | |
259 | "rf_insn,fpu_fpu") | |
5ce6f47b EC |
260 | |
261 | ;; tie up the fpu unit to emulate the balance for the "repeat | |
262 | ;; rate" of 8 (2 are spent in the iss unit) | |
a8cdbec0 RS |
263 | (define_insn_reservation "ir_sr70_fmul_df" 16 |
264 | (and (eq_attr "cpu" "sr71000") | |
265 | (and (eq_attr "type" "fmul,fmadd") | |
266 | (eq_attr "mode" "DF"))) | |
267 | "rf_insn,fpu_fpu*6") | |
5ce6f47b EC |
268 | |
269 | ||
270 | ;; RECIP insn uses same type attr as div, and for SR3, has same | |
271 | ;; timings for double. However, single RECIP has a latency of | |
272 | ;; 28 -- only way to fix this is to introduce new insn attrs. | |
273 | ;; cycles spent in iter unit are designed to satisfy balance | |
274 | ;; of "repeat" latency after insn uses up rf_multi1 reservation | |
a8cdbec0 RS |
275 | (define_insn_reservation "ir_sr70_fdiv_sf" 60 |
276 | (and (eq_attr "cpu" "sr71000") | |
277 | (and (eq_attr "type" "fdiv,frdiv") | |
278 | (eq_attr "mode" "SF"))) | |
279 | "rf_multi1+(fpu_iter*51)") | |
280 | ||
281 | (define_insn_reservation "ir_sr70_fdiv_df" 120 | |
282 | (and (eq_attr "cpu" "sr71000") | |
283 | (and (eq_attr "type" "fdiv,frdiv") | |
284 | (eq_attr "mode" "DF"))) | |
285 | "rf_multi1+(fpu_iter*109)") | |
286 | ||
287 | (define_insn_reservation "ir_sr70_fabs" 4 | |
288 | (and (eq_attr "cpu" "sr71000") | |
289 | (eq_attr "type" "fabs,fneg,fmove")) | |
290 | "rf_insn,fpu_fpu") | |
291 | ||
292 | (define_insn_reservation "ir_sr70_fcmp" 10 | |
293 | (and (eq_attr "cpu" "sr71000") | |
294 | (eq_attr "type" "fcmp")) | |
295 | "rf_insn,fpu_fpu") | |
5ce6f47b EC |
296 | |
297 | ;; "fcvt" type attribute covers a number of diff insns, most have the same | |
298 | ;; latency descriptions, a few vary. We use the | |
299 | ;; most common timing (which is also worst case). | |
a8cdbec0 RS |
300 | (define_insn_reservation "ir_sr70_fcvt" 12 |
301 | (and (eq_attr "cpu" "sr71000") | |
302 | (eq_attr "type" "fcvt")) | |
303 | "rf_insn,fpu_fpu*4") | |
304 | ||
305 | (define_insn_reservation "ir_sr70_fsqrt_sf" 62 | |
306 | (and (eq_attr "cpu" "sr71000") | |
307 | (and (eq_attr "type" "fsqrt") | |
308 | (eq_attr "mode" "SF"))) | |
309 | "rf_multi1+(fpu_iter*53)") | |
310 | ||
311 | (define_insn_reservation "ir_sr70_fsqrt_df" 122 | |
312 | (and (eq_attr "cpu" "sr71000") | |
313 | (and (eq_attr "type" "fsqrt") | |
314 | (eq_attr "mode" "DF"))) | |
315 | "rf_multi1+(fpu_iter*111)") | |
316 | ||
317 | (define_insn_reservation "ir_sr70_frsqrt_sf" 48 | |
318 | (and (eq_attr "cpu" "sr71000") | |
319 | (and (eq_attr "type" "frsqrt") | |
320 | (eq_attr "mode" "SF"))) | |
321 | "rf_multi1+(fpu_iter*39)") | |
322 | ||
323 | (define_insn_reservation "ir_sr70_frsqrt_df" 240 | |
324 | (and (eq_attr "cpu" "sr71000") | |
325 | (and (eq_attr "type" "frsqrt") | |
326 | (eq_attr "mode" "DF"))) | |
327 | "rf_multi1+(fpu_iter*229)") | |
328 | ||
329 | (define_insn_reservation "ir_sr70_multi" 1 | |
330 | (and (eq_attr "cpu" "sr71000") | |
331 | (eq_attr "type" "multi")) | |
332 | "serial_dispatch") | |
333 | ||
334 | (define_insn_reservation "ir_sr70_nop" 1 | |
335 | (and (eq_attr "cpu" "sr71000") | |
336 | (eq_attr "type" "nop")) | |
337 | "ri_insns") |