]>
Commit | Line | Data |
---|---|---|
7a3446ec | 1 | ;; DFA-based pipeline description for the VR1x000. |
7adcbafe | 2 | ;; Copyright (C) 2005-2022 Free Software Foundation, Inc. |
7a3446ec JK |
3 | ;; |
4 | ;; This file is part of GCC. | |
5 | ||
6 | ;; GCC is free software; you can redistribute it and/or modify it | |
7 | ;; under the terms of the GNU General Public License as published | |
8 | ;; by the Free Software Foundation; either version 3, or (at your | |
9 | ;; option) any later version. | |
10 | ||
11 | ;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 | ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
13 | ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
14 | ;; License for more details. | |
15 | ||
16 | ;; You should have received a copy of the GNU General Public License | |
17 | ;; along with GCC; see the file COPYING3. If not see | |
18 | ;; <http://www.gnu.org/licenses/>. | |
19 | ||
20 | ||
21 | ;; R12K/R14K/R16K are derivatives of R10K, thus copy its description | |
22 | ;; until specific tuning for each is added. | |
23 | ||
24 | ;; R10000 has an int queue, fp queue, address queue. | |
25 | ;; The int queue feeds ALU1 and ALU2. | |
26 | ;; The fp queue feeds the fp-adder and fp-multiplier. | |
27 | ;; The addr queue feeds the Load/Store unit. | |
28 | ;; | |
29 | ;; However, we define the fp-adder and fp-multiplier as | |
30 | ;; separate automatons, because the fp-multiplier is | |
31 | ;; divided into fp-multiplier, fp-division, and | |
32 | ;; fp-squareroot units, all of which share the same | |
33 | ;; issue and completion logic, yet can operate in | |
34 | ;; parallel. | |
35 | ;; | |
36 | ;; This is based on the model described in the R10K Manual | |
37 | ;; and it helps to reduce the size of the automata. | |
38 | (define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr, | |
39 | r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt") | |
40 | ||
41 | (define_cpu_unit "r10k_alu1" "r10k_a_int") | |
42 | (define_cpu_unit "r10k_alu2" "r10k_a_int") | |
43 | (define_cpu_unit "r10k_fpadd" "r10k_a_fpadder") | |
44 | (define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy") | |
45 | (define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv") | |
46 | (define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt") | |
47 | (define_cpu_unit "r10k_loadstore" "r10k_a_addr") | |
48 | ||
49 | ||
50 | ;; R10k Loads and Stores. | |
51 | (define_insn_reservation "r10k_load" 2 | |
52 | (and (eq_attr "cpu" "r10000") | |
53 | (eq_attr "type" "load,prefetch,prefetchx")) | |
54 | "r10k_loadstore") | |
55 | ||
56 | (define_insn_reservation "r10k_store" 0 | |
57 | (and (eq_attr "cpu" "r10000") | |
58 | (eq_attr "type" "store,fpstore,fpidxstore")) | |
59 | "r10k_loadstore") | |
60 | ||
61 | (define_insn_reservation "r10k_fpload" 3 | |
62 | (and (eq_attr "cpu" "r10000") | |
63 | (eq_attr "type" "fpload,fpidxload")) | |
64 | "r10k_loadstore") | |
65 | ||
66 | ||
67 | ;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2. | |
68 | ;; Miscellaneous arith goes here too (this is a guess). | |
69 | (define_insn_reservation "r10k_arith" 1 | |
70 | (and (eq_attr "cpu" "r10000") | |
cb00489c | 71 | (eq_attr "type" "arith,mthi,mtlo,slt,clz,const,nop,trap,logical")) |
7a3446ec JK |
72 | "r10k_alu1 | r10k_alu2") |
73 | ||
74 | ;; We treat mfhilo differently, because we need to know when | |
75 | ;; it's HI and when it's LO. | |
76 | (define_insn_reservation "r10k_mfhi" 1 | |
77 | (and (eq_attr "cpu" "r10000") | |
cb00489c | 78 | (eq_attr "type" "mfhi")) |
7a3446ec JK |
79 | "r10k_alu1 | r10k_alu2") |
80 | ||
81 | (define_insn_reservation "r10k_mflo" 1 | |
82 | (and (eq_attr "cpu" "r10000") | |
cb00489c | 83 | (eq_attr "type" "mflo")) |
7a3446ec JK |
84 | "r10k_alu1 | r10k_alu2") |
85 | ||
86 | ||
87 | ;; ALU1 handles shifts, branch eval, and condmove. | |
88 | ;; | |
89 | ;; Brancher is separate, but part of ALU1, but can only | |
90 | ;; do one branch per cycle (is this even implementable?). | |
91 | ;; | |
92 | ;; Unsure if the brancher handles jumps and calls as well, but since | |
93 | ;; they're related, we'll add them here for now. | |
94 | (define_insn_reservation "r10k_brancher" 1 | |
95 | (and (eq_attr "cpu" "r10000") | |
96 | (eq_attr "type" "shift,branch,jump,call")) | |
97 | "r10k_alu1") | |
98 | ||
99 | (define_insn_reservation "r10k_int_cmove" 1 | |
100 | (and (eq_attr "cpu" "r10000") | |
101 | (and (eq_attr "type" "condmove") | |
102 | (eq_attr "mode" "SI,DI"))) | |
103 | "r10k_alu1") | |
104 | ||
105 | ||
106 | ;; Coprocessor Moves. | |
107 | ;; mtc1/dmtc1 are handled by ALU1. | |
108 | ;; mfc1/dmfc1 are handled by the fp-multiplier. | |
109 | (define_insn_reservation "r10k_mt_xfer" 3 | |
110 | (and (eq_attr "cpu" "r10000") | |
111 | (eq_attr "type" "mtc")) | |
112 | "r10k_alu1") | |
113 | ||
114 | (define_insn_reservation "r10k_mf_xfer" 2 | |
115 | (and (eq_attr "cpu" "r10000") | |
116 | (eq_attr "type" "mfc")) | |
117 | "r10k_fpmpy") | |
118 | ||
119 | ||
120 | ;; Only ALU2 does int multiplications and divisions. | |
121 | ;; | |
122 | ;; According to the Vr10000 series user manual, | |
123 | ;; integer mult and div insns can be issued one | |
124 | ;; cycle earlier if using register Lo. We model | |
125 | ;; this by using the Lo value by default, as it | |
126 | ;; is the more common value, and use a bypass | |
127 | ;; for the Hi value when needed. | |
128 | ;; | |
129 | ;; Also of note, There are different latencies | |
130 | ;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7). | |
131 | ;; However, gcc does not have separate types | |
132 | ;; for these insns. Thus to strike a balance, | |
133 | ;; we use the Hi latency value for imul | |
134 | ;; operations until the imul type can be split. | |
135 | (define_insn_reservation "r10k_imul_single" 6 | |
136 | (and (eq_attr "cpu" "r10000") | |
137 | (and (eq_attr "type" "imul,imul3") | |
138 | (eq_attr "mode" "SI"))) | |
139 | "r10k_alu2 * 6") | |
140 | ||
141 | (define_insn_reservation "r10k_imul_double" 10 | |
142 | (and (eq_attr "cpu" "r10000") | |
143 | (and (eq_attr "type" "imul,imul3") | |
144 | (eq_attr "mode" "DI"))) | |
145 | "r10k_alu2 * 10") | |
146 | ||
147 | ;; Divides keep ALU2 busy. | |
148 | (define_insn_reservation "r10k_idiv_single" 34 | |
149 | (and (eq_attr "cpu" "r10000") | |
150 | (and (eq_attr "type" "idiv") | |
151 | (eq_attr "mode" "SI"))) | |
152 | "r10k_alu2 * 35") | |
153 | ||
154 | (define_insn_reservation "r10k_idiv_double" 66 | |
155 | (and (eq_attr "cpu" "r10000") | |
156 | (and (eq_attr "type" "idiv") | |
157 | (eq_attr "mode" "DI"))) | |
158 | "r10k_alu2 * 67") | |
159 | ||
160 | (define_bypass 35 "r10k_idiv_single" "r10k_mfhi") | |
161 | (define_bypass 67 "r10k_idiv_double" "r10k_mfhi") | |
162 | ||
163 | ||
164 | ;; Floating point add/sub, mul, abs value, neg, comp, & moves. | |
165 | (define_insn_reservation "r10k_fp_miscadd" 2 | |
166 | (and (eq_attr "cpu" "r10000") | |
167 | (eq_attr "type" "fadd,fabs,fneg,fcmp")) | |
168 | "r10k_fpadd") | |
169 | ||
170 | (define_insn_reservation "r10k_fp_miscmul" 2 | |
171 | (and (eq_attr "cpu" "r10000") | |
172 | (eq_attr "type" "fmul,fmove")) | |
173 | "r10k_fpmpy") | |
174 | ||
175 | (define_insn_reservation "r10k_fp_cmove" 2 | |
176 | (and (eq_attr "cpu" "r10000") | |
177 | (and (eq_attr "type" "condmove") | |
178 | (eq_attr "mode" "SF,DF"))) | |
179 | "r10k_fpmpy") | |
180 | ||
181 | ||
182 | ;; The fcvt.s.[wl] insn has latency 4, repeat 2. | |
183 | ;; All other fcvt insns have latency 2, repeat 1. | |
184 | (define_insn_reservation "r10k_fcvt_single" 4 | |
185 | (and (eq_attr "cpu" "r10000") | |
186 | (and (eq_attr "type" "fcvt") | |
187 | (eq_attr "cnv_mode" "I2S"))) | |
188 | "r10k_fpadd * 2") | |
189 | ||
190 | (define_insn_reservation "r10k_fcvt_other" 2 | |
191 | (and (eq_attr "cpu" "r10000") | |
192 | (and (eq_attr "type" "fcvt") | |
193 | (eq_attr "cnv_mode" "!I2S"))) | |
194 | "r10k_fpadd") | |
195 | ||
196 | ||
197 | ;; Run the fmadd insn through fp-adder first, then fp-multiplier. | |
198 | ;; | |
199 | ;; The latency for fmadd is 2 cycles if the result is used | |
200 | ;; by another fmadd instruction. | |
201 | (define_insn_reservation "r10k_fmadd" 4 | |
202 | (and (eq_attr "cpu" "r10000") | |
203 | (eq_attr "type" "fmadd")) | |
204 | "r10k_fpadd, r10k_fpmpy") | |
205 | ||
206 | (define_bypass 2 "r10k_fmadd" "r10k_fmadd") | |
207 | ||
208 | ||
209 | ;; Floating point Divisions & square roots. | |
210 | (define_insn_reservation "r10k_fdiv_single" 12 | |
211 | (and (eq_attr "cpu" "r10000") | |
212 | (and (eq_attr "type" "fdiv,frdiv") | |
213 | (eq_attr "mode" "SF"))) | |
214 | "r10k_fpdiv * 14") | |
215 | ||
216 | (define_insn_reservation "r10k_fdiv_double" 19 | |
217 | (and (eq_attr "cpu" "r10000") | |
218 | (and (eq_attr "type" "fdiv,frdiv") | |
219 | (eq_attr "mode" "DF"))) | |
220 | "r10k_fpdiv * 21") | |
221 | ||
222 | (define_insn_reservation "r10k_fsqrt_single" 18 | |
223 | (and (eq_attr "cpu" "r10000") | |
224 | (and (eq_attr "type" "fsqrt") | |
225 | (eq_attr "mode" "SF"))) | |
226 | "r10k_fpsqrt * 20") | |
227 | ||
228 | (define_insn_reservation "r10k_fsqrt_double" 33 | |
229 | (and (eq_attr "cpu" "r10000") | |
230 | (and (eq_attr "type" "fsqrt") | |
231 | (eq_attr "mode" "DF"))) | |
232 | "r10k_fpsqrt * 35") | |
233 | ||
234 | (define_insn_reservation "r10k_frsqrt_single" 30 | |
235 | (and (eq_attr "cpu" "r10000") | |
236 | (and (eq_attr "type" "frsqrt") | |
237 | (eq_attr "mode" "SF"))) | |
238 | "r10k_fpsqrt * 20") | |
239 | ||
240 | (define_insn_reservation "r10k_frsqrt_double" 52 | |
241 | (and (eq_attr "cpu" "r10000") | |
242 | (and (eq_attr "type" "frsqrt") | |
243 | (eq_attr "mode" "DF"))) | |
244 | "r10k_fpsqrt * 35") | |
245 | ||
246 | ||
247 | ;; Handle unknown/multi insns here (this is a guess). | |
248 | (define_insn_reservation "r10k_unknown" 1 | |
249 | (and (eq_attr "cpu" "r10000") | |
3088716e | 250 | (eq_attr "type" "unknown,multi,atomic,syncloop")) |
7a3446ec | 251 | "r10k_alu1 + r10k_alu2") |