]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/bdver1.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / bdver1.md
1 ;; Copyright (C) 2010-2023 Free Software Foundation, Inc.
2 ;;
3 ;; This file is part of GCC.
4 ;;
5 ;; GCC is free software; you can redistribute it and/or modify
6 ;; it under the terms of the GNU General Public License as published by
7 ;; the Free Software Foundation; either version 3, or (at your option)
8 ;; any later version.
9 ;;
10 ;; GCC is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ;; GNU General Public License for more details.
14 ;;
15 ;; You should have received a copy of the GNU General Public License
16 ;; along with GCC; see the file COPYING3. If not see
17 ;; <http://www.gnu.org/licenses/>.
18 ;;
19 ;; AMD bdver1 Scheduling
20 ;;
21 ;; The bdver1 contains four pipelined FP units, two integer units and
22 ;; two address generation units.
23 ;;
24 ;; The predecode logic is determining boundaries of instructions in the 64
25 ;; byte cache line. So the cache line straddling problem of K6 might be issue
26 ;; here as well, but it is not noted in the documentation.
27 ;;
28 ;; Three DirectPath instructions decoders and only one VectorPath decoder
29 ;; is available. They can decode three DirectPath instructions or one
30 ;; VectorPath instruction per cycle.
31 ;;
32 ;; The load/store queue unit is not attached to the schedulers but
33 ;; communicates with all the execution units separately instead.
34
35
36 (define_attr "bdver1_decode" "direct,vector,double"
37 (const_string "direct"))
38
39 (define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu")
40
41 (define_cpu_unit "bdver1-decode0" "bdver1")
42 (define_cpu_unit "bdver1-decode1" "bdver1")
43 (define_cpu_unit "bdver1-decode2" "bdver1")
44 (define_cpu_unit "bdver1-decodev" "bdver1")
45
46 ;; Model the fact that double decoded instruction may take 2 cycles
47 ;; to decode when decoder2 and decoder0 in next cycle
48 ;; is used (this is needed to allow throughput of 1.5 double decoded
49 ;; instructions per cycle).
50 ;;
51 ;; In order to avoid dependence between reservation of decoder
52 ;; and other units, we model decoder as two stage fully pipelined unit
53 ;; and only double decoded instruction may occupy unit in the first cycle.
54 ;; With this scheme however two double instructions can be issued cycle0.
55 ;;
56 ;; Avoid this by using presence set requiring decoder0 to be allocated
57 ;; too. Vector decoded instructions then can't be issued when modeled
58 ;; as consuming decoder0+decoder1+decoder2.
59 ;; We solve that by specialized vector decoder unit and exclusion set.
60 (presence_set "bdver1-decode2" "bdver1-decode0")
61 (exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2")
62
63 (define_reservation "bdver1-vector" "nothing,bdver1-decodev")
64 (define_reservation "bdver1-direct1" "nothing,bdver1-decode1")
65 (define_reservation "bdver1-direct" "nothing,
66 (bdver1-decode0 | bdver1-decode1
67 | bdver1-decode2)")
68 ;; Double instructions behaves like two direct instructions.
69 (define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0)
70 | (nothing,(bdver1-decode0 + bdver1-decode1))
71 | (nothing,(bdver1-decode1 + bdver1-decode2)))")
72
73
74 (define_cpu_unit "bdver1-ieu0" "bdver1_ieu")
75 (define_cpu_unit "bdver1-ieu1" "bdver1_ieu")
76 (define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)")
77
78 (define_cpu_unit "bdver1-agu0" "bdver1_agu")
79 (define_cpu_unit "bdver1-agu1" "bdver1_agu")
80 (define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)")
81
82 (define_cpu_unit "bdver1-load0" "bdver1_load")
83 (define_cpu_unit "bdver1-load1" "bdver1_load")
84 (define_reservation "bdver1-load" "bdver1-agu,
85 (bdver1-load0 | bdver1-load1),nothing")
86 ;; 128bit SSE instructions issue two loads at once.
87 (define_reservation "bdver1-load2" "bdver1-agu,
88 (bdver1-load0 + bdver1-load1),nothing")
89
90 (define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)")
91 ;; 128bit SSE instructions issue two stores at once.
92 (define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)")
93
94 ;; vectorpath (microcoded) instructions are single issue instructions.
95 ;; So, they occupy all the integer units.
96 (define_reservation "bdver1-ivector" "bdver1-ieu0+bdver1-ieu1+
97 bdver1-agu0+bdver1-agu1+
98 bdver1-load0+bdver1-load1")
99
100 ;; The FP operations start to execute at stage 12 in the pipeline, while
101 ;; integer operations start to execute at stage 9 for athlon and 11 for K8
102 ;; Compensate the difference for athlon because it results in significantly
103 ;; smaller automata.
104 ;; NOTE: the above information was just copied from athlon.md, and was not
105 ;; actually verified for bdver1.
106 (define_reservation "bdver1-fpsched" "nothing,nothing,nothing")
107 ;; The floating point loads.
108 (define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)")
109 (define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)")
110
111 ;; Four FP units.
112 (define_cpu_unit "bdver1-ffma0" "bdver1_fp")
113 (define_cpu_unit "bdver1-ffma1" "bdver1_fp")
114 (define_cpu_unit "bdver1-fmal0" "bdver1_fp")
115 (define_cpu_unit "bdver1-fmal1" "bdver1_fp")
116
117 (define_reservation "bdver1-ffma" "(bdver1-ffma0 | bdver1-ffma1)")
118 (define_reservation "bdver1-fcvt" "bdver1-ffma0")
119 (define_reservation "bdver1-fmma" "bdver1-ffma0")
120 (define_reservation "bdver1-fxbar" "bdver1-ffma1")
121 (define_reservation "bdver1-fmal" "(bdver1-fmal0 | bdver1-fmal1)")
122 (define_reservation "bdver1-fsto" "bdver1-fmal1")
123
124 ;; Vector operations usually consume many of pipes.
125 (define_reservation "bdver1-fvector" "(bdver1-ffma0 + bdver1-ffma1
126 + bdver1-fmal0 + bdver1-fmal1)")
127
128 ;; Jump instructions are executed in the branch unit completely transparent to us.
129 (define_insn_reservation "bdver1_call" 0
130 (and (eq_attr "cpu" "bdver1,bdver2")
131 (eq_attr "type" "call,callv"))
132 "bdver1-double,bdver1-agu")
133 ;; PUSH mem is double path.
134 (define_insn_reservation "bdver1_push" 1
135 (and (eq_attr "cpu" "bdver1,bdver2")
136 (eq_attr "type" "push"))
137 "bdver1-direct,bdver1-agu,bdver1-store")
138 ;; POP r16/mem are double path.
139 (define_insn_reservation "bdver1_pop" 1
140 (and (eq_attr "cpu" "bdver1,bdver2")
141 (eq_attr "type" "pop"))
142 "bdver1-direct,bdver1-ivector")
143 ;; LEAVE no latency info so far, assume same with amdfam10.
144 (define_insn_reservation "bdver1_leave" 3
145 (and (eq_attr "cpu" "bdver1,bdver2")
146 (eq_attr "type" "leave"))
147 "bdver1-vector,bdver1-ivector")
148 ;; LEA executes in AGU unit with 1 cycle latency on BDVER1.
149 (define_insn_reservation "bdver1_lea" 1
150 (and (eq_attr "cpu" "bdver1,bdver2")
151 (eq_attr "type" "lea"))
152 "bdver1-direct,bdver1-agu")
153
154 ;; MUL executes in special multiplier unit attached to IEU1.
155 (define_insn_reservation "bdver1_imul_DI" 6
156 (and (eq_attr "cpu" "bdver1,bdver2")
157 (and (eq_attr "type" "imul")
158 (and (eq_attr "mode" "DI")
159 (eq_attr "memory" "none,unknown"))))
160 "bdver1-direct1,bdver1-ieu1")
161 (define_insn_reservation "bdver1_imul" 4
162 (and (eq_attr "cpu" "bdver1,bdver2")
163 (and (eq_attr "type" "imul")
164 (eq_attr "memory" "none,unknown")))
165 "bdver1-direct1,bdver1-ieu1")
166 (define_insn_reservation "bdver1_imul_mem_DI" 10
167 (and (eq_attr "cpu" "bdver1,bdver2")
168 (and (eq_attr "type" "imul")
169 (and (eq_attr "mode" "DI")
170 (eq_attr "memory" "load,both"))))
171 "bdver1-direct1,bdver1-load,bdver1-ieu1")
172 (define_insn_reservation "bdver1_imul_mem" 8
173 (and (eq_attr "cpu" "bdver1,bdver2")
174 (and (eq_attr "type" "imul")
175 (eq_attr "memory" "load,both")))
176 "bdver1-direct1,bdver1-load,bdver1-ieu1")
177
178 ;; IDIV cannot execute in parallel with other instructions. Dealing with it
179 ;; as with short latency vector instruction is good approximation avoiding
180 ;; scheduler from trying too hard to can hide it's latency by overlap with
181 ;; other instructions.
182 ;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles
183 ;; of the other code.
184 (define_insn_reservation "bdver1_idiv" 6
185 (and (eq_attr "cpu" "bdver1,bdver2")
186 (and (eq_attr "type" "idiv")
187 (eq_attr "memory" "none,unknown")))
188 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))")
189
190 (define_insn_reservation "bdver1_idiv_mem" 10
191 (and (eq_attr "cpu" "bdver1,bdver2")
192 (and (eq_attr "type" "idiv")
193 (eq_attr "memory" "load,both")))
194 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))")
195
196 ;; The parallelism of string instructions is not documented. Model it same way
197 ;; as IDIV to create smaller automata. This probably does not matter much.
198 ;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV.
199 (define_insn_reservation "bdver1_str" 6
200 (and (eq_attr "cpu" "bdver1,bdver2")
201 (and (eq_attr "type" "str")
202 (eq_attr "memory" "load,both,store")))
203 "bdver1-vector,bdver1-load,bdver1-ieu0*6")
204
205 ;; Integer instructions.
206 (define_insn_reservation "bdver1_idirect" 1
207 (and (eq_attr "cpu" "bdver1,bdver2")
208 (and (eq_attr "bdver1_decode" "direct")
209 (and (eq_attr "unit" "integer,unknown")
210 (eq_attr "memory" "none,unknown"))))
211 "bdver1-direct,bdver1-ieu")
212 (define_insn_reservation "bdver1_ivector" 2
213 (and (eq_attr "cpu" "bdver1,bdver2")
214 (and (eq_attr "bdver1_decode" "vector")
215 (and (eq_attr "unit" "integer,unknown")
216 (eq_attr "memory" "none,unknown"))))
217 "bdver1-vector,bdver1-ieu,bdver1-ieu")
218 (define_insn_reservation "bdver1_idirect_loadmov" 4
219 (and (eq_attr "cpu" "bdver1,bdver2")
220 (and (eq_attr "type" "imov")
221 (eq_attr "memory" "load")))
222 "bdver1-direct,bdver1-load")
223 (define_insn_reservation "bdver1_idirect_load" 5
224 (and (eq_attr "cpu" "bdver1,bdver2")
225 (and (eq_attr "bdver1_decode" "direct")
226 (and (eq_attr "unit" "integer,unknown")
227 (eq_attr "memory" "load"))))
228 "bdver1-direct,bdver1-load,bdver1-ieu")
229 (define_insn_reservation "bdver1_ivector_load" 6
230 (and (eq_attr "cpu" "bdver1,bdver2")
231 (and (eq_attr "bdver1_decode" "vector")
232 (and (eq_attr "unit" "integer,unknown")
233 (eq_attr "memory" "load"))))
234 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu")
235 (define_insn_reservation "bdver1_idirect_movstore" 4
236 (and (eq_attr "cpu" "bdver1,bdver2")
237 (and (eq_attr "type" "imov")
238 (eq_attr "memory" "store")))
239 "bdver1-direct,bdver1-agu,bdver1-store")
240 (define_insn_reservation "bdver1_idirect_both" 4
241 (and (eq_attr "cpu" "bdver1,bdver2")
242 (and (eq_attr "bdver1_decode" "direct")
243 (and (eq_attr "unit" "integer,unknown")
244 (eq_attr "memory" "both"))))
245 "bdver1-direct,bdver1-load,
246 bdver1-ieu,bdver1-store,
247 bdver1-store")
248 (define_insn_reservation "bdver1_ivector_both" 5
249 (and (eq_attr "cpu" "bdver1,bdver2")
250 (and (eq_attr "bdver1_decode" "vector")
251 (and (eq_attr "unit" "integer,unknown")
252 (eq_attr "memory" "both"))))
253 "bdver1-vector,bdver1-load,
254 bdver1-ieu,
255 bdver1-ieu,
256 bdver1-store")
257 (define_insn_reservation "bdver1_idirect_store" 4
258 (and (eq_attr "cpu" "bdver1,bdver2")
259 (and (eq_attr "bdver1_decode" "direct")
260 (and (eq_attr "unit" "integer,unknown")
261 (eq_attr "memory" "store"))))
262 "bdver1-direct,(bdver1-ieu+bdver1-agu),
263 bdver1-store")
264 (define_insn_reservation "bdver1_ivector_store" 5
265 (and (eq_attr "cpu" "bdver1,bdver2")
266 (and (eq_attr "bdver1_decode" "vector")
267 (and (eq_attr "unit" "integer,unknown")
268 (eq_attr "memory" "store"))))
269 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu,
270 bdver1-store")
271
272 ;; BDVER1 floating point units.
273 (define_insn_reservation "bdver1_fldxf" 13
274 (and (eq_attr "cpu" "bdver1,bdver2")
275 (and (eq_attr "type" "fmov")
276 (and (eq_attr "memory" "load")
277 (eq_attr "mode" "XF"))))
278 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9")
279 (define_insn_reservation "bdver1_fld" 5
280 (and (eq_attr "cpu" "bdver1,bdver2")
281 (and (eq_attr "type" "fmov")
282 (eq_attr "memory" "load")))
283 "bdver1-direct,bdver1-fpload,bdver1-ffma")
284 (define_insn_reservation "bdver1_fstxf" 8
285 (and (eq_attr "cpu" "bdver1,bdver2")
286 (and (eq_attr "type" "fmov")
287 (and (eq_attr "memory" "store,both")
288 (eq_attr "mode" "XF"))))
289 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))")
290 (define_insn_reservation "bdver1_fst" 2
291 (and (eq_attr "cpu" "bdver1,bdver2")
292 (and (eq_attr "type" "fmov")
293 (eq_attr "memory" "store,both")))
294 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
295 (define_insn_reservation "bdver1_fist" 2
296 (and (eq_attr "cpu" "bdver1,bdver2")
297 (eq_attr "type" "fistp,fisttp"))
298 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
299 (define_insn_reservation "bdver1_fmov_bdver1" 2
300 (and (eq_attr "cpu" "bdver1,bdver2")
301 (eq_attr "type" "fmov"))
302 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
303 (define_insn_reservation "bdver1_fadd_load" 10
304 (and (eq_attr "cpu" "bdver1,bdver2")
305 (and (eq_attr "type" "fop")
306 (eq_attr "memory" "load")))
307 "bdver1-direct,bdver1-fpload,bdver1-ffma")
308 (define_insn_reservation "bdver1_fadd" 6
309 (and (eq_attr "cpu" "bdver1,bdver2")
310 (eq_attr "type" "fop"))
311 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
312 (define_insn_reservation "bdver1_fmul_load" 10
313 (and (eq_attr "cpu" "bdver1,bdver2")
314 (and (eq_attr "type" "fmul")
315 (eq_attr "memory" "load")))
316 "bdver1-double,bdver1-fpload,bdver1-ffma")
317 (define_insn_reservation "bdver1_fmul" 6
318 (and (eq_attr "cpu" "bdver1,bdver2")
319 (eq_attr "type" "fmul"))
320 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
321 (define_insn_reservation "bdver1_fsgn" 2
322 (and (eq_attr "cpu" "bdver1,bdver2")
323 (eq_attr "type" "fsgn"))
324 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
325 (define_insn_reservation "bdver1_fdiv_load" 46
326 (and (eq_attr "cpu" "bdver1,bdver2")
327 (and (eq_attr "type" "fdiv")
328 (eq_attr "memory" "load")))
329 "bdver1-direct,bdver1-fpload,bdver1-ffma")
330 (define_insn_reservation "bdver1_fdiv" 42
331 (and (eq_attr "cpu" "bdver1,bdver2")
332 (eq_attr "type" "fdiv"))
333 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
334 (define_insn_reservation "bdver1_fpspc_load" 103
335 (and (eq_attr "cpu" "bdver1,bdver2")
336 (and (eq_attr "type" "fpspc")
337 (eq_attr "memory" "load")))
338 "bdver1-vector,bdver1-fpload,bdver1-fvector")
339 (define_insn_reservation "bdver1_fpspc" 100
340 (and (eq_attr "cpu" "bdver1,bdver2")
341 (and (eq_attr "type" "fpspc")
342 (eq_attr "memory" "load")))
343 "bdver1-vector,bdver1-fpload,bdver1-fvector")
344 (define_insn_reservation "bdver1_fcmov_load" 17
345 (and (eq_attr "cpu" "bdver1,bdver2")
346 (and (eq_attr "type" "fcmov")
347 (eq_attr "memory" "load")))
348 "bdver1-vector,bdver1-fpload,bdver1-fvector")
349 (define_insn_reservation "bdver1_fcmov" 15
350 (and (eq_attr "cpu" "bdver1,bdver2")
351 (eq_attr "type" "fcmov"))
352 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
353 (define_insn_reservation "bdver1_fcomi_load" 6
354 (and (eq_attr "cpu" "bdver1,bdver2")
355 (and (eq_attr "type" "fcmp")
356 (and (eq_attr "bdver1_decode" "double")
357 (eq_attr "memory" "load"))))
358 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
359 (define_insn_reservation "bdver1_fcomi" 2
360 (and (eq_attr "cpu" "bdver1,bdver2")
361 (and (eq_attr "bdver1_decode" "double")
362 (eq_attr "type" "fcmp")))
363 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
364 (define_insn_reservation "bdver1_fcom_load" 6
365 (and (eq_attr "cpu" "bdver1,bdver2")
366 (and (eq_attr "type" "fcmp")
367 (eq_attr "memory" "load")))
368 "bdver1-direct,bdver1-fpload,bdver1-ffma")
369 (define_insn_reservation "bdver1_fcom" 2
370 (and (eq_attr "cpu" "bdver1,bdver2")
371 (eq_attr "type" "fcmp"))
372 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
373 (define_insn_reservation "bdver1_fxch" 2
374 (and (eq_attr "cpu" "bdver1,bdver2")
375 (eq_attr "type" "fxch"))
376 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
377
378 ;; SSE loads.
379 (define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4
380 (and (eq_attr "cpu" "bdver1,bdver2")
381 (and (eq_attr "type" "ssemov")
382 (and (eq_attr "prefix" "vex")
383 (and (eq_attr "movu" "1")
384 (and (eq_attr "mode" "V4SF,V2DF")
385 (eq_attr "memory" "load"))))))
386 "bdver1-direct,bdver1-fpload")
387 (define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5
388 (and (eq_attr "cpu" "bdver1,bdver2")
389 (and (eq_attr "type" "ssemov")
390 (and (eq_attr "movu" "1")
391 (and (eq_attr "mode" "V8SF,V4DF")
392 (eq_attr "memory" "load")))))
393 "bdver1-double,bdver1-fpload")
394 (define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4
395 (and (eq_attr "cpu" "bdver1,bdver2")
396 (and (eq_attr "type" "ssemov")
397 (and (eq_attr "movu" "1")
398 (and (eq_attr "mode" "V4SF,V2DF")
399 (eq_attr "memory" "load")))))
400 "bdver1-direct,bdver1-fpload,bdver1-fmal")
401 (define_insn_reservation "bdver1_ssevector_avx128_load" 4
402 (and (eq_attr "cpu" "bdver1,bdver2")
403 (and (eq_attr "type" "ssemov")
404 (and (eq_attr "prefix" "vex")
405 (and (eq_attr "mode" "V4SF,V2DF,TI")
406 (eq_attr "memory" "load")))))
407 "bdver1-direct,bdver1-fpload,bdver1-fmal")
408 (define_insn_reservation "bdver1_ssevector_avx256_load" 5
409 (and (eq_attr "cpu" "bdver1,bdver2")
410 (and (eq_attr "type" "ssemov")
411 (and (eq_attr "mode" "V8SF,V4DF,OI")
412 (eq_attr "memory" "load"))))
413 "bdver1-double,bdver1-fpload,bdver1-fmal")
414 (define_insn_reservation "bdver1_ssevector_sse128_load" 4
415 (and (eq_attr "cpu" "bdver1,bdver2")
416 (and (eq_attr "type" "ssemov")
417 (and (eq_attr "mode" "V4SF,V2DF,TI")
418 (eq_attr "memory" "load"))))
419 "bdver1-direct,bdver1-fpload")
420 (define_insn_reservation "bdver1_ssescalar_movq_load" 4
421 (and (eq_attr "cpu" "bdver1,bdver2")
422 (and (eq_attr "type" "ssemov")
423 (and (eq_attr "mode" "DI")
424 (eq_attr "memory" "load"))))
425 "bdver1-direct,bdver1-fpload,bdver1-fmal")
426 (define_insn_reservation "bdver1_ssescalar_vmovss_load" 4
427 (and (eq_attr "cpu" "bdver1,bdver2")
428 (and (eq_attr "type" "ssemov")
429 (and (eq_attr "prefix" "vex")
430 (and (eq_attr "mode" "SF")
431 (eq_attr "memory" "load")))))
432 "bdver1-direct,bdver1-fpload")
433 (define_insn_reservation "bdver1_ssescalar_sse128_load" 4
434 (and (eq_attr "cpu" "bdver1,bdver2")
435 (and (eq_attr "type" "ssemov")
436 (and (eq_attr "mode" "SF,DF")
437 (eq_attr "memory" "load"))))
438 "bdver1-direct,bdver1-fpload, bdver1-ffma")
439 (define_insn_reservation "bdver1_mmxsse_load" 4
440 (and (eq_attr "cpu" "bdver1,bdver2")
441 (and (eq_attr "type" "mmxmov,ssemov")
442 (eq_attr "memory" "load")))
443 "bdver1-direct,bdver1-fpload, bdver1-fmal")
444
445 ;; SSE stores.
446 (define_insn_reservation "bdver1_sse_store_avx256" 5
447 (and (eq_attr "cpu" "bdver1,bdver2")
448 (and (eq_attr "type" "ssemov")
449 (and (eq_attr "mode" "V8SF,V4DF,OI")
450 (eq_attr "memory" "store,both"))))
451 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
452 (define_insn_reservation "bdver1_sse_store" 4
453 (and (eq_attr "cpu" "bdver1,bdver2")
454 (and (eq_attr "type" "ssemov")
455 (and (eq_attr "mode" "V4SF,V2DF,TI")
456 (eq_attr "memory" "store,both"))))
457 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)")
458 (define_insn_reservation "bdver1_mmxsse_store_short" 4
459 (and (eq_attr "cpu" "bdver1,bdver2")
460 (and (eq_attr "type" "mmxmov,ssemov")
461 (eq_attr "memory" "store,both")))
462 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)")
463
464 ;; Register moves.
465 (define_insn_reservation "bdver1_ssevector_avx256" 3
466 (and (eq_attr "cpu" "bdver1,bdver2")
467 (and (eq_attr "type" "ssemov")
468 (and (eq_attr "mode" "V8SF,V4DF,OI")
469 (eq_attr "memory" "none"))))
470 "bdver1-double,bdver1-fpsched,bdver1-fmal")
471 (define_insn_reservation "bdver1_movss_movsd" 2
472 (and (eq_attr "cpu" "bdver1,bdver2")
473 (and (eq_attr "type" "ssemov")
474 (and (eq_attr "mode" "SF,DF")
475 (eq_attr "memory" "none"))))
476 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
477 (define_insn_reservation "bdver1_mmxssemov" 2
478 (and (eq_attr "cpu" "bdver1,bdver2")
479 (and (eq_attr "type" "mmxmov,ssemov")
480 (eq_attr "memory" "none")))
481 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
482 ;; SSE logs.
483 (define_insn_reservation "bdver1_sselog_load_256" 7
484 (and (eq_attr "cpu" "bdver1,bdver2")
485 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
486 (and (eq_attr "mode" "V8SF")
487 (eq_attr "memory" "load"))))
488 "bdver1-double,bdver1-fpload,bdver1-fmal")
489 (define_insn_reservation "bdver1_sselog_256" 3
490 (and (eq_attr "cpu" "bdver1,bdver2")
491 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
492 (eq_attr "mode" "V8SF")))
493 "bdver1-double,bdver1-fpsched,bdver1-fmal")
494 (define_insn_reservation "bdver1_sselog_load" 6
495 (and (eq_attr "cpu" "bdver1,bdver2")
496 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
497 (eq_attr "memory" "load")))
498 "bdver1-direct,bdver1-fpload,bdver1-fxbar")
499 (define_insn_reservation "bdver1_sselog" 2
500 (and (eq_attr "cpu" "bdver1,bdver2")
501 (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
502 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
503
504 ;; PCMP actually executes in FMAL.
505 (define_insn_reservation "bdver1_ssecmp_load" 6
506 (and (eq_attr "cpu" "bdver1,bdver2")
507 (and (eq_attr "type" "ssecmp")
508 (eq_attr "memory" "load")))
509 "bdver1-direct,bdver1-fpload,bdver1-ffma")
510 (define_insn_reservation "bdver1_ssecmp" 2
511 (and (eq_attr "cpu" "bdver1,bdver2")
512 (eq_attr "type" "ssecmp"))
513 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
514 (define_insn_reservation "bdver1_ssecomi_load" 6
515 (and (eq_attr "cpu" "bdver1,bdver2")
516 (and (eq_attr "type" "ssecomi")
517 (eq_attr "memory" "load")))
518 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)")
519 (define_insn_reservation "bdver1_ssecomi" 2
520 (and (eq_attr "cpu" "bdver1,bdver2")
521 (eq_attr "type" "ssecomi"))
522 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)")
523
524 ;; Conversions behaves very irregularly and the scheduling is critical here.
525 ;; Take each instruction separately.
526
527 ;; 256 bit conversion.
528 (define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8
529 (and (eq_attr "cpu" "bdver1,bdver2")
530 (and (eq_attr "type" "ssecvt")
531 (and (eq_attr "memory" "load")
532 (ior (ior (match_operand:V4DF 0 "register_operand")
533 (ior (match_operand:V8SF 0 "register_operand")
534 (match_operand:V8SI 0 "register_operand")))
535 (ior (match_operand:V4DF 1 "nonimmediate_operand")
536 (ior (match_operand:V8SF 1 "nonimmediate_operand")
537 (match_operand:V8SI 1 "nonimmediate_operand")))))))
538 "bdver1-vector,bdver1-fpload,bdver1-fvector")
539 (define_insn_reservation "bdver1_vcvtX2Y_avx256" 4
540 (and (eq_attr "cpu" "bdver1,bdver2")
541 (and (eq_attr "type" "ssecvt")
542 (and (eq_attr "memory" "none")
543 (ior (ior (match_operand:V4DF 0 "register_operand")
544 (ior (match_operand:V8SF 0 "register_operand")
545 (match_operand:V8SI 0 "register_operand")))
546 (ior (match_operand:V4DF 1 "nonimmediate_operand")
547 (ior (match_operand:V8SF 1 "nonimmediate_operand")
548 (match_operand:V8SI 1 "nonimmediate_operand")))))))
549 "bdver1-vector,bdver1-fpsched,bdver1-fvector")
550 ;; CVTSS2SD, CVTSD2SS.
551 (define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8
552 (and (eq_attr "cpu" "bdver1,bdver2")
553 (and (eq_attr "type" "ssecvt")
554 (and (eq_attr "mode" "SF,DF")
555 (eq_attr "memory" "load"))))
556 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
557 (define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4
558 (and (eq_attr "cpu" "bdver1,bdver2")
559 (and (eq_attr "type" "ssecvt")
560 (and (eq_attr "mode" "SF,DF")
561 (eq_attr "memory" "none"))))
562 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
563 ;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ.
564 (define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8
565 (and (eq_attr "cpu" "bdver1,bdver2")
566 (and (eq_attr "type" "sseicvt")
567 (and (eq_attr "mode" "SF,DF")
568 (eq_attr "memory" "load"))))
569 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
570 (define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4
571 (and (eq_attr "cpu" "bdver1,bdver2")
572 (and (eq_attr "type" "sseicvt")
573 (and (eq_attr "mode" "SF,DF")
574 (eq_attr "memory" "none"))))
575 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)")
576 ;; CVTPD2PS.
577 (define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8
578 (and (eq_attr "cpu" "bdver1,bdver2")
579 (and (eq_attr "type" "ssecvt")
580 (and (eq_attr "memory" "load")
581 (and (match_operand:V4SF 0 "register_operand")
582 (match_operand:V2DF 1 "nonimmediate_operand")))))
583 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
584 (define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4
585 (and (eq_attr "cpu" "bdver1,bdver2")
586 (and (eq_attr "type" "ssecvt")
587 (and (eq_attr "memory" "none")
588 (and (match_operand:V4SF 0 "register_operand")
589 (match_operand:V2DF 1 "nonimmediate_operand")))))
590 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
591 ;; CVTPI2PS, CVTDQ2PS.
592 (define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8
593 (and (eq_attr "cpu" "bdver1,bdver2")
594 (and (eq_attr "type" "ssecvt")
595 (and (eq_attr "memory" "load")
596 (and (match_operand:V4SF 0 "register_operand")
597 (ior (match_operand:V2SI 1 "nonimmediate_operand")
598 (match_operand:V4SI 1 "nonimmediate_operand"))))))
599 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
600 (define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4
601 (and (eq_attr "cpu" "bdver1,bdver2")
602 (and (eq_attr "type" "ssecvt")
603 (and (eq_attr "memory" "none")
604 (and (match_operand:V4SF 0 "register_operand")
605 (ior (match_operand:V2SI 1 "nonimmediate_operand")
606 (match_operand:V4SI 1 "nonimmediate_operand"))))))
607 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
608 ;; CVTDQ2PD.
609 (define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8
610 (and (eq_attr "cpu" "bdver1,bdver2")
611 (and (eq_attr "type" "ssecvt")
612 (and (eq_attr "memory" "load")
613 (and (match_operand:V2DF 0 "register_operand")
614 (match_operand:V4SI 1 "nonimmediate_operand")))))
615 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
616 (define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4
617 (and (eq_attr "cpu" "bdver1,bdver2")
618 (and (eq_attr "type" "ssecvt")
619 (and (eq_attr "memory" "none")
620 (and (match_operand:V2DF 0 "register_operand")
621 (match_operand:V4SI 1 "nonimmediate_operand")))))
622 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
623 ;; CVTPS2PD, CVTPI2PD.
624 (define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6
625 (and (eq_attr "cpu" "bdver1,bdver2")
626 (and (eq_attr "type" "ssecvt")
627 (and (eq_attr "memory" "load")
628 (and (match_operand:V2DF 0 "register_operand")
629 (ior (match_operand:V2SI 1 "nonimmediate_operand")
630 (match_operand:V4SF 1 "nonimmediate_operand"))))))
631 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)")
632 (define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2
633 (and (eq_attr "cpu" "bdver1,bdver2")
634 (and (eq_attr "type" "ssecvt")
635 (and (eq_attr "memory" "load")
636 (and (match_operand:V2DF 0 "register_operand")
637 (ior (match_operand:V2SI 1 "nonimmediate_operand")
638 (match_operand:V4SF 1 "nonimmediate_operand"))))))
639 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)")
640 ;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ.
641 (define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8
642 (and (eq_attr "cpu" "bdver1,bdver2")
643 (and (eq_attr "type" "sseicvt")
644 (and (eq_attr "mode" "SI,DI")
645 (eq_attr "memory" "load"))))
646 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)")
647 (define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4
648 (and (eq_attr "cpu" "bdver1,bdver2")
649 (and (eq_attr "type" "sseicvt")
650 (and (eq_attr "mode" "SI,DI")
651 (eq_attr "memory" "none"))))
652 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)")
653 ;; CVTPD2PI, CVTTPD2PI.
654 (define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8
655 (and (eq_attr "cpu" "bdver1,bdver2")
656 (and (eq_attr "type" "ssecvt")
657 (and (eq_attr "memory" "load")
658 (and (match_operand:V2DF 1 "nonimmediate_operand")
659 (match_operand:V2SI 0 "register_operand")))))
660 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
661 (define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4
662 (and (eq_attr "cpu" "bdver1,bdver2")
663 (and (eq_attr "type" "ssecvt")
664 (and (eq_attr "memory" "none")
665 (and (match_operand:V2DF 1 "nonimmediate_operand")
666 (match_operand:V2SI 0 "register_operand")))))
667 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
668 ;; CVTPD2DQ, CVTTPD2DQ.
669 (define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6
670 (and (eq_attr "cpu" "bdver1,bdver2")
671 (and (eq_attr "type" "ssecvt")
672 (and (eq_attr "memory" "load")
673 (and (match_operand:V2DF 1 "nonimmediate_operand")
674 (match_operand:V4SI 0 "register_operand")))))
675 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)")
676 (define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2
677 (and (eq_attr "cpu" "bdver1,bdver2")
678 (and (eq_attr "type" "ssecvt")
679 (and (eq_attr "memory" "none")
680 (and (match_operand:V2DF 1 "nonimmediate_operand")
681 (match_operand:V4SI 0 "register_operand")))))
682 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)")
683 ;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ.
684 (define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8
685 (and (eq_attr "cpu" "bdver1,bdver2")
686 (and (eq_attr "type" "ssecvt")
687 (and (eq_attr "memory" "load")
688 (and (match_operand:V4SF 1 "nonimmediate_operand")
689 (ior (match_operand: V2SI 0 "register_operand")
690 (match_operand: V4SI 0 "register_operand"))))))
691 "bdver1-direct,bdver1-fpload,bdver1-fcvt")
692 (define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4
693 (and (eq_attr "cpu" "bdver1,bdver2")
694 (and (eq_attr "type" "ssecvt")
695 (and (eq_attr "memory" "none")
696 (and (match_operand:V4SF 1 "nonimmediate_operand")
697 (ior (match_operand: V2SI 0 "register_operand")
698 (match_operand: V4SI 0 "register_operand"))))))
699 "bdver1-direct,bdver1-fpsched,bdver1-fcvt")
700
701 ;; SSE MUL, ADD, and MULADD.
702 (define_insn_reservation "bdver1_ssemuladd_load_256" 11
703 (and (eq_attr "cpu" "bdver1,bdver2")
704 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
705 (and (eq_attr "mode" "V8SF,V4DF")
706 (eq_attr "memory" "load"))))
707 "bdver1-double,bdver1-fpload,bdver1-ffma")
708 (define_insn_reservation "bdver1_ssemuladd_256" 7
709 (and (eq_attr "cpu" "bdver1,bdver2")
710 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
711 (and (eq_attr "mode" "V8SF,V4DF")
712 (eq_attr "memory" "none"))))
713 "bdver1-double,bdver1-fpsched,bdver1-ffma")
714 (define_insn_reservation "bdver1_ssemuladd_load" 10
715 (and (eq_attr "cpu" "bdver1,bdver2")
716 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
717 (eq_attr "memory" "load")))
718 "bdver1-direct,bdver1-fpload,bdver1-ffma")
719 (define_insn_reservation "bdver1_ssemuladd" 6
720 (and (eq_attr "cpu" "bdver1,bdver2")
721 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd")
722 (eq_attr "memory" "none")))
723 "bdver1-direct,bdver1-fpsched,bdver1-ffma")
724 (define_insn_reservation "bdver1_sseimul_load" 8
725 (and (eq_attr "cpu" "bdver1,bdver2")
726 (and (eq_attr "type" "sseimul")
727 (eq_attr "memory" "load")))
728 "bdver1-direct,bdver1-fpload,bdver1-fmma")
729 (define_insn_reservation "bdver1_sseimul" 4
730 (and (eq_attr "cpu" "bdver1,bdver2")
731 (and (eq_attr "type" "sseimul")
732 (eq_attr "memory" "none")))
733 "bdver1-direct,bdver1-fpsched,bdver1-fmma")
734 (define_insn_reservation "bdver1_sseiadd_load" 6
735 (and (eq_attr "cpu" "bdver1,bdver2")
736 (and (eq_attr "type" "sseiadd")
737 (eq_attr "memory" "load")))
738 "bdver1-direct,bdver1-fpload,bdver1-fmal")
739 (define_insn_reservation "bdver1_sseiadd" 2
740 (and (eq_attr "cpu" "bdver1,bdver2")
741 (and (eq_attr "type" "sseiadd")
742 (eq_attr "memory" "none")))
743 "bdver1-direct,bdver1-fpsched,bdver1-fmal")
744
745 ;; SSE DIV: no throughput information (assume same as amdfam10).
746 (define_insn_reservation "bdver1_ssediv_double_load_256" 31
747 (and (eq_attr "cpu" "bdver1,bdver2")
748 (and (eq_attr "type" "ssediv")
749 (and (eq_attr "mode" "V4DF")
750 (eq_attr "memory" "load"))))
751 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
752 (define_insn_reservation "bdver1_ssediv_double_256" 27
753 (and (eq_attr "cpu" "bdver1,bdver2")
754 (and (eq_attr "type" "ssediv")
755 (and (eq_attr "mode" "V4DF")
756 (eq_attr "memory" "none"))))
757 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
758 (define_insn_reservation "bdver1_ssediv_single_load_256" 28
759 (and (eq_attr "cpu" "bdver1,bdver2")
760 (and (eq_attr "type" "ssediv")
761 (and (eq_attr "mode" "V8SF")
762 (eq_attr "memory" "load"))))
763 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
764 (define_insn_reservation "bdver1_ssediv_single_256" 24
765 (and (eq_attr "cpu" "bdver1,bdver2")
766 (and (eq_attr "type" "ssediv")
767 (and (eq_attr "mode" "V8SF")
768 (eq_attr "memory" "none"))))
769 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
770 (define_insn_reservation "bdver1_ssediv_double_load" 31
771 (and (eq_attr "cpu" "bdver1,bdver2")
772 (and (eq_attr "type" "ssediv")
773 (and (eq_attr "mode" "DF,V2DF")
774 (eq_attr "memory" "load"))))
775 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
776 (define_insn_reservation "bdver1_ssediv_double" 27
777 (and (eq_attr "cpu" "bdver1,bdver2")
778 (and (eq_attr "type" "ssediv")
779 (and (eq_attr "mode" "DF,V2DF")
780 (eq_attr "memory" "none"))))
781 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
782 (define_insn_reservation "bdver1_ssediv_single_load" 28
783 (and (eq_attr "cpu" "bdver1,bdver2")
784 (and (eq_attr "type" "ssediv")
785 (and (eq_attr "mode" "SF,V4SF")
786 (eq_attr "memory" "load"))))
787 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)")
788 (define_insn_reservation "bdver1_ssediv_single" 24
789 (and (eq_attr "cpu" "bdver1,bdver2")
790 (and (eq_attr "type" "ssediv")
791 (and (eq_attr "mode" "SF,V4SF")
792 (eq_attr "memory" "none"))))
793 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)")
794
795 (define_insn_reservation "bdver1_sseins" 3
796 (and (eq_attr "cpu" "bdver1,bdver2")
797 (and (eq_attr "type" "sseins")
798 (eq_attr "mode" "TI")))
799 "bdver1-direct,bdver1-fpsched,bdver1-fxbar")
800