]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/core2.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / core2.md
CommitLineData
660b994f 1;; Scheduling for Core 2 and derived processors.
a945c346 2;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
660b994f
BS
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3. If not see
18;; <http://www.gnu.org/licenses/>. */
19
20;; The scheduling description in this file is based on the one in ppro.md,
21;; with additional information obtained from
22;;
23;; "How to optimize for the Pentium family of microprocessors",
24;; by Agner Fog, PhD.
25;;
26;; The major difference from the P6 pipeline is one extra decoder, and
27;; one extra execute unit. Due to micro-op fusion, many insns no longer
28;; need to be decoded in decoder 0, but can be handled by all of them.
29
30;; The core2_idiv, core2_fdiv and core2_ssediv automata are used to
31;; model issue latencies of idiv, fdiv and ssediv type insns.
32(define_automaton "core2_decoder,core2_core,core2_idiv,core2_fdiv,core2_ssediv,core2_load,core2_store")
33
34;; The CPU domain, used for Core i7 bypass latencies
35(define_attr "i7_domain" "int,float,simd"
36 (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
37 (const_string "float")
38 (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul,
b790dea2 39 sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt,
660b994f
BS
40 ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
41 (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF")
42 (const_string "float")
43 (eq_attr "mode" "SI")
44 (const_string "int")]
45 (const_string "simd"))
46 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
47 (const_string "simd")]
48 (const_string "int")))
49
50;; As for the Pentium Pro,
51;; - an instruction with 1 uop can be decoded by any of the three
52;; decoders in one cycle.
53;; - an instruction with 1 to 4 uops can be decoded only by decoder 0
54;; but still in only one cycle.
55;; - a complex (microcode) instruction can also only be decoded by
56;; decoder 0, and this takes an unspecified number of cycles.
57;;
58;; The goal is to schedule such that we have a few-one-one uops sequence
59;; in each cycle, to decode as many instructions per cycle as possible.
60(define_cpu_unit "c2_decoder0" "core2_decoder")
61(define_cpu_unit "c2_decoder1" "core2_decoder")
62(define_cpu_unit "c2_decoder2" "core2_decoder")
63(define_cpu_unit "c2_decoder3" "core2_decoder")
64
65;; We first wish to find an instruction for c2_decoder0, so exclude
66;; c2_decoder1 and c2_decoder2 from being reserved until c2_decoder 0 is
67;; reserved.
68(presence_set "c2_decoder1" "c2_decoder0")
69(presence_set "c2_decoder2" "c2_decoder0")
70(presence_set "c2_decoder3" "c2_decoder0")
71
72;; Most instructions can be decoded on any of the three decoders.
73(define_reservation "c2_decodern" "(c2_decoder0|c2_decoder1|c2_decoder2|c2_decoder3)")
74
75;; The out-of-order core has six pipelines. These are similar to the
76;; Pentium Pro's five pipelines. Port 2 is responsible for memory loads,
77;; port 3 for store address calculations, port 4 for memory stores, and
78;; ports 0, 1 and 5 for everything else.
79
80(define_cpu_unit "c2_p0,c2_p1,c2_p5" "core2_core")
81(define_cpu_unit "c2_p2" "core2_load")
82(define_cpu_unit "c2_p3,c2_p4" "core2_store")
83(define_cpu_unit "c2_idiv" "core2_idiv")
84(define_cpu_unit "c2_fdiv" "core2_fdiv")
85(define_cpu_unit "c2_ssediv" "core2_ssediv")
86
87;; Only the irregular instructions have to be modeled here. A load
88;; increases the latency by 2 or 3, or by nothing if the manual gives
89;; a latency already. Store latencies are not accounted for.
90;;
91;; The simple instructions follow a very regular pattern of 1 uop per
92;; reg-reg operation, 1 uop per load on port 2. and 2 uops per store
93;; on port 4 and port 3. These instructions are modelled at the bottom
94;; of this file.
95;;
96;; For microcoded instructions we don't know how many uops are produced.
97;; These instructions are the "complex" ones in the Intel manuals. All
98;; we _do_ know is that they typically produce four or more uops, so
99;; they can only be decoded on c2_decoder0. Modelling their latencies
100;; doesn't make sense because we don't know how these instructions are
101;; executed in the core. So we just model that they can only be decoded
102;; on decoder 0, and say that it takes a little while before the result
103;; is available.
104(define_insn_reservation "c2_complex_insn" 6
d3c11974 105 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
106 (eq_attr "type" "other,multi,str"))
107 "c2_decoder0")
108
109(define_insn_reservation "c2_call" 1
d3c11974 110 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
111 (eq_attr "type" "call,callv"))
112 "c2_decoder0")
113
114;; imov with memory operands does not use the integer units.
115;; imovx always decodes to one uop, and also doesn't use the integer
116;; units if it has memory operands.
117(define_insn_reservation "c2_imov" 1
d3c11974 118 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
119 (and (eq_attr "memory" "none")
120 (eq_attr "type" "imov,imovx")))
121 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
122
123(define_insn_reservation "c2_imov_load" 4
d3c11974 124 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
125 (and (eq_attr "memory" "load")
126 (eq_attr "type" "imov,imovx")))
127 "c2_decodern,c2_p2")
128
129(define_insn_reservation "c2_imov_store" 1
d3c11974 130 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
131 (and (eq_attr "memory" "store")
132 (eq_attr "type" "imov")))
133 "c2_decodern,c2_p4+c2_p3")
134
135(define_insn_reservation "c2_icmov" 2
d3c11974 136 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
137 (and (eq_attr "memory" "none")
138 (eq_attr "type" "icmov")))
139 "c2_decoder0,(c2_p0|c2_p1|c2_p5)*2")
140
141(define_insn_reservation "c2_icmov_load" 2
d3c11974 142 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
143 (and (eq_attr "memory" "load")
144 (eq_attr "type" "icmov")))
145 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5)*2")
146
147(define_insn_reservation "c2_push_reg" 1
d3c11974 148 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
149 (and (eq_attr "memory" "store")
150 (eq_attr "type" "push")))
151 "c2_decodern,c2_p4+c2_p3")
152
153(define_insn_reservation "c2_push_mem" 1
d3c11974 154 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
155 (and (eq_attr "memory" "both")
156 (eq_attr "type" "push")))
157 "c2_decoder0,c2_p2,c2_p4+c2_p3")
158
159;; lea executes on port 0 with latency one and throughput 1.
160(define_insn_reservation "c2_lea" 1
d3c11974 161 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
162 (and (eq_attr "memory" "none")
163 (eq_attr "type" "lea")))
164 "c2_decodern,c2_p0")
165
166;; Shift and rotate decode as two uops which can go to port 0 or 5.
167;; The load and store units need to be reserved when memory operands
168;; are involved.
169(define_insn_reservation "c2_shift_rotate" 1
d3c11974 170 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
171 (and (eq_attr "memory" "none")
172 (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
173 "c2_decodern,(c2_p0|c2_p5)")
174
175(define_insn_reservation "c2_shift_rotate_mem" 4
d3c11974 176 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
177 (and (eq_attr "memory" "!none")
178 (eq_attr "type" "ishift,ishift1,rotate,rotate1")))
179 "c2_decoder0,c2_p2,(c2_p0|c2_p5),c2_p4+c2_p3")
180
181;; See comments in ppro.md for the corresponding reservation.
182(define_insn_reservation "c2_branch" 1
d3c11974 183 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
184 (and (eq_attr "memory" "none")
185 (eq_attr "type" "ibr")))
186 "c2_decodern,c2_p5")
187
188;; ??? Indirect branches probably have worse latency than this.
189(define_insn_reservation "c2_indirect_branch" 6
d3c11974 190 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
191 (and (eq_attr "memory" "!none")
192 (eq_attr "type" "ibr")))
193 "c2_decoder0,c2_p2+c2_p5")
194
195(define_insn_reservation "c2_leave" 4
d3c11974 196 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
197 (eq_attr "type" "leave"))
198 "c2_decoder0,c2_p2+(c2_p0|c2_p1),(c2_p0|c2_p1)")
199
200;; mul and imul with two/three operands only execute on port 1 for HImode
201;; and SImode, port 0 for DImode.
202(define_insn_reservation "c2_imul_hisi" 3
d3c11974 203 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
204 (and (eq_attr "memory" "none")
205 (and (eq_attr "mode" "HI,SI")
206 (eq_attr "type" "imul"))))
207 "c2_decodern,c2_p1")
208
209(define_insn_reservation "c2_imul_hisi_mem" 3
d3c11974 210 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
211 (and (eq_attr "memory" "!none")
212 (and (eq_attr "mode" "HI,SI")
213 (eq_attr "type" "imul"))))
214 "c2_decoder0,c2_p2+c2_p1")
215
216(define_insn_reservation "c2_imul_di" 5
d3c11974 217 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
218 (and (eq_attr "memory" "none")
219 (and (eq_attr "mode" "DI")
220 (eq_attr "type" "imul"))))
221 "c2_decodern,c2_p0")
222
223(define_insn_reservation "c2_imul_di_mem" 5
d3c11974 224 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
225 (and (eq_attr "memory" "!none")
226 (and (eq_attr "mode" "DI")
227 (eq_attr "type" "imul"))))
228 "c2_decoder0,c2_p2+c2_p0")
229
230;; div and idiv are very similar, so we model them the same.
231;; QI, HI, and SI have issue latency 12, 21, and 37, respectively.
232;; These issue latencies are modelled via the c2_div automaton.
233(define_insn_reservation "c2_idiv_QI" 19
d3c11974 234 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
235 (and (eq_attr "memory" "none")
236 (and (eq_attr "mode" "QI")
237 (eq_attr "type" "idiv"))))
238 "c2_decoder0,(c2_p0+c2_idiv)*2,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9")
239
240(define_insn_reservation "c2_idiv_QI_load" 19
d3c11974 241 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
242 (and (eq_attr "memory" "load")
243 (and (eq_attr "mode" "QI")
244 (eq_attr "type" "idiv"))))
245 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*9")
246
247(define_insn_reservation "c2_idiv_HI" 23
d3c11974 248 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
249 (and (eq_attr "memory" "none")
250 (and (eq_attr "mode" "HI")
251 (eq_attr "type" "idiv"))))
252 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*17")
253
254(define_insn_reservation "c2_idiv_HI_load" 23
d3c11974 255 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
256 (and (eq_attr "memory" "load")
257 (and (eq_attr "mode" "HI")
258 (eq_attr "type" "idiv"))))
259 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*18")
260
261(define_insn_reservation "c2_idiv_SI" 39
d3c11974 262 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
263 (and (eq_attr "memory" "none")
264 (and (eq_attr "mode" "SI")
265 (eq_attr "type" "idiv"))))
266 "c2_decoder0,(c2_p0+c2_idiv)*3,(c2_p0|c2_p1)+c2_idiv,c2_idiv*33")
267
268(define_insn_reservation "c2_idiv_SI_load" 39
d3c11974 269 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
270 (and (eq_attr "memory" "load")
271 (and (eq_attr "mode" "SI")
272 (eq_attr "type" "idiv"))))
273 "c2_decoder0,c2_p2+c2_p0+c2_idiv,c2_p0+c2_idiv,(c2_p0|c2_p1)+c2_idiv,c2_idiv*34")
274
275;; x87 floating point operations.
276
277(define_insn_reservation "c2_fxch" 0
d3c11974 278 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
279 (eq_attr "type" "fxch"))
280 "c2_decodern")
281
282(define_insn_reservation "c2_fop" 3
d3c11974 283 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
284 (and (eq_attr "memory" "none,unknown")
285 (eq_attr "type" "fop")))
286 "c2_decodern,c2_p1")
287
288(define_insn_reservation "c2_fop_load" 5
d3c11974 289 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
290 (and (eq_attr "memory" "load")
291 (eq_attr "type" "fop")))
292 "c2_decoder0,c2_p2+c2_p1,c2_p1")
293
294(define_insn_reservation "c2_fop_store" 3
d3c11974 295 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
296 (and (eq_attr "memory" "store")
297 (eq_attr "type" "fop")))
298 "c2_decoder0,c2_p0,c2_p0,c2_p0+c2_p4+c2_p3")
299
300(define_insn_reservation "c2_fop_both" 5
d3c11974 301 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
302 (and (eq_attr "memory" "both")
303 (eq_attr "type" "fop")))
304 "c2_decoder0,c2_p2+c2_p0,c2_p0+c2_p4+c2_p3")
305
306(define_insn_reservation "c2_fsgn" 1
d3c11974 307 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
308 (eq_attr "type" "fsgn"))
309 "c2_decodern,c2_p0")
310
311(define_insn_reservation "c2_fistp" 5
d3c11974 312 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
313 (eq_attr "type" "fistp"))
314 "c2_decoder0,c2_p0*2,c2_p4+c2_p3")
315
316(define_insn_reservation "c2_fcmov" 2
d3c11974 317 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
318 (eq_attr "type" "fcmov"))
319 "c2_decoder0,c2_p0*2")
320
321(define_insn_reservation "c2_fcmp" 1
d3c11974 322 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
323 (and (eq_attr "memory" "none")
324 (eq_attr "type" "fcmp")))
325 "c2_decodern,c2_p1")
326
327(define_insn_reservation "c2_fcmp_load" 4
d3c11974 328 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
329 (and (eq_attr "memory" "load")
330 (eq_attr "type" "fcmp")))
331 "c2_decoder0,c2_p2+c2_p1")
332
333(define_insn_reservation "c2_fmov" 1
d3c11974 334 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
335 (and (eq_attr "memory" "none")
336 (eq_attr "type" "fmov")))
337 "c2_decodern,c2_p0")
338
339(define_insn_reservation "c2_fmov_load" 1
d3c11974 340 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
341 (and (eq_attr "memory" "load")
342 (and (eq_attr "mode" "!XF")
343 (eq_attr "type" "fmov"))))
344 "c2_decodern,c2_p2")
345
346(define_insn_reservation "c2_fmov_XF_load" 3
d3c11974 347 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
348 (and (eq_attr "memory" "load")
349 (and (eq_attr "mode" "XF")
350 (eq_attr "type" "fmov"))))
351 "c2_decoder0,(c2_p2+c2_p0)*2")
352
353(define_insn_reservation "c2_fmov_store" 1
d3c11974 354 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
355 (and (eq_attr "memory" "store")
356 (and (eq_attr "mode" "!XF")
357 (eq_attr "type" "fmov"))))
358 "c2_decodern,c2_p3+c2_p4")
359
360(define_insn_reservation "c2_fmov_XF_store" 3
d3c11974 361 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
362 (and (eq_attr "memory" "store")
363 (and (eq_attr "mode" "XF")
364 (eq_attr "type" "fmov"))))
365 "c2_decoder0,(c2_p3+c2_p4),(c2_p3+c2_p4)")
366
367;; fmul executes on port 0 with latency 5. It has issue latency 2,
368;; but we don't model this.
369(define_insn_reservation "c2_fmul" 5
d3c11974 370 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
371 (and (eq_attr "memory" "none")
372 (eq_attr "type" "fmul")))
373 "c2_decoder0,c2_p0*2")
374
375(define_insn_reservation "c2_fmul_load" 6
d3c11974 376 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
377 (and (eq_attr "memory" "load")
378 (eq_attr "type" "fmul")))
379 "c2_decoder0,c2_p2+c2_p0,c2_p0")
380
381;; fdiv latencies depend on the mode of the operands. XFmode gives
382;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18.
383;; Division by a power of 2 takes only 9 cycles, but we cannot model
384;; that. Throughput is equal to latency - 1, which we model using the
385;; c2_div automaton.
386(define_insn_reservation "c2_fdiv_SF" 18
d3c11974 387 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
388 (and (eq_attr "memory" "none")
389 (and (eq_attr "mode" "SF")
390 (eq_attr "type" "fdiv,fpspc"))))
391 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*16")
392
393(define_insn_reservation "c2_fdiv_SF_load" 19
d3c11974 394 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
395 (and (eq_attr "memory" "load")
396 (and (eq_attr "mode" "SF")
397 (eq_attr "type" "fdiv,fpspc"))))
398 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*16")
399
400(define_insn_reservation "c2_fdiv_DF" 32
d3c11974 401 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
402 (and (eq_attr "memory" "none")
403 (and (eq_attr "mode" "DF")
404 (eq_attr "type" "fdiv,fpspc"))))
405 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*30")
406
407(define_insn_reservation "c2_fdiv_DF_load" 33
d3c11974 408 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
409 (and (eq_attr "memory" "load")
410 (and (eq_attr "mode" "DF")
411 (eq_attr "type" "fdiv,fpspc"))))
412 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*30")
413
414(define_insn_reservation "c2_fdiv_XF" 38
d3c11974 415 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
416 (and (eq_attr "memory" "none")
417 (and (eq_attr "mode" "XF")
418 (eq_attr "type" "fdiv,fpspc"))))
419 "c2_decodern,c2_p0+c2_fdiv,c2_fdiv*36")
420
421(define_insn_reservation "c2_fdiv_XF_load" 39
d3c11974 422 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
423 (and (eq_attr "memory" "load")
424 (and (eq_attr "mode" "XF")
425 (eq_attr "type" "fdiv,fpspc"))))
426 "c2_decoder0,c2_p2+c2_p0+c2_fdiv,c2_fdiv*36")
427
428;; MMX instructions.
429
430(define_insn_reservation "c2_mmx_add" 1
d3c11974 431 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
432 (and (eq_attr "memory" "none")
433 (eq_attr "type" "mmxadd,sseiadd")))
434 "c2_decodern,c2_p0|c2_p5")
435
436(define_insn_reservation "c2_mmx_add_load" 2
d3c11974 437 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
438 (and (eq_attr "memory" "load")
439 (eq_attr "type" "mmxadd,sseiadd")))
440 "c2_decodern,c2_p2+c2_p0|c2_p5")
441
442(define_insn_reservation "c2_mmx_shft" 1
d3c11974 443 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
444 (and (eq_attr "memory" "none")
445 (eq_attr "type" "mmxshft")))
446 "c2_decodern,c2_p0|c2_p5")
447
448(define_insn_reservation "c2_mmx_shft_load" 2
d3c11974 449 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
450 (and (eq_attr "memory" "load")
451 (eq_attr "type" "mmxshft")))
452 "c2_decoder0,c2_p2+c2_p1")
453
454(define_insn_reservation "c2_mmx_sse_shft" 1
d3c11974 455 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
456 (and (eq_attr "memory" "none")
457 (and (eq_attr "type" "sseishft")
458 (eq_attr "length_immediate" "!0"))))
459 "c2_decodern,c2_p1")
460
461(define_insn_reservation "c2_mmx_sse_shft_load" 2
d3c11974 462 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
463 (and (eq_attr "memory" "load")
464 (and (eq_attr "type" "sseishft")
465 (eq_attr "length_immediate" "!0"))))
466 "c2_decodern,c2_p1")
467
468(define_insn_reservation "c2_mmx_sse_shft1" 2
d3c11974 469 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
470 (and (eq_attr "memory" "none")
471 (and (eq_attr "type" "sseishft")
472 (eq_attr "length_immediate" "0"))))
473 "c2_decodern,c2_p1")
474
475(define_insn_reservation "c2_mmx_sse_shft1_load" 3
d3c11974 476 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
477 (and (eq_attr "memory" "load")
478 (and (eq_attr "type" "sseishft")
479 (eq_attr "length_immediate" "0"))))
480 "c2_decodern,c2_p1")
481
482(define_insn_reservation "c2_mmx_mul" 3
d3c11974 483 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
484 (and (eq_attr "memory" "none")
485 (eq_attr "type" "mmxmul,sseimul")))
486 "c2_decodern,c2_p1")
487
488(define_insn_reservation "c2_mmx_mul_load" 3
d3c11974 489 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
490 (and (eq_attr "memory" "none")
491 (eq_attr "type" "mmxmul,sseimul")))
492 "c2_decoder0,c2_p2+c2_p1")
493
494(define_insn_reservation "c2_sse_mmxcvt" 4
d3c11974 495 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
496 (and (eq_attr "mode" "DI")
497 (eq_attr "type" "mmxcvt")))
498 "c2_decodern,c2_p1")
499
500;; FIXME: These are Pentium III only, but we cannot tell here if
501;; we're generating code for PentiumPro/Pentium II or Pentium III
502;; (define_insn_reservation "c2_sse_mmxshft" 2
d3c11974 503;; (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
504;; (and (eq_attr "mode" "TI")
505;; (eq_attr "type" "mmxshft")))
506;; "c2_decodern,c2_p0")
507
508;; The sfence instruction.
509(define_insn_reservation "c2_sse_sfence" 3
d3c11974 510 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
511 (and (eq_attr "memory" "unknown")
512 (eq_attr "type" "sse")))
513 "c2_decoder0,c2_p4+c2_p3")
514
515;; FIXME: This reservation is all wrong when we're scheduling sqrtss.
516(define_insn_reservation "c2_sse_SFDF" 3
d3c11974 517 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
518 (and (eq_attr "mode" "SF,DF")
519 (eq_attr "type" "sse")))
520 "c2_decodern,c2_p0")
521
522(define_insn_reservation "c2_sse_V4SF" 4
d3c11974 523 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
524 (and (eq_attr "mode" "V4SF")
525 (eq_attr "type" "sse")))
526 "c2_decoder0,c2_p1*2")
527
528(define_insn_reservation "c2_sse_addcmp" 3
d3c11974 529 (and (eq_attr "cpu" "core2,nehalem")
660b994f 530 (and (eq_attr "memory" "none")
b790dea2 531 (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
660b994f
BS
532 "c2_decodern,c2_p1")
533
534(define_insn_reservation "c2_sse_addcmp_load" 3
d3c11974 535 (and (eq_attr "cpu" "core2,nehalem")
660b994f 536 (and (eq_attr "memory" "load")
b790dea2 537 (eq_attr "type" "sseadd,sseadd1,ssecmp,ssecomi")))
660b994f
BS
538 "c2_decodern,c2_p2+c2_p1")
539
540(define_insn_reservation "c2_sse_mul_SF" 4
d3c11974 541 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
542 (and (eq_attr "memory" "none")
543 (and (eq_attr "mode" "SF,V4SF")
544 (eq_attr "type" "ssemul"))))
545 "c2_decodern,c2_p0")
546
547(define_insn_reservation "c2_sse_mul_SF_load" 4
d3c11974 548 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
549 (and (eq_attr "memory" "load")
550 (and (eq_attr "mode" "SF,V4SF")
551 (eq_attr "type" "ssemul"))))
552 "c2_decodern,c2_p2+c2_p0")
553
554(define_insn_reservation "c2_sse_mul_DF" 5
d3c11974 555 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
556 (and (eq_attr "memory" "none")
557 (and (eq_attr "mode" "DF,V2DF")
558 (eq_attr "type" "ssemul"))))
559 "c2_decodern,c2_p0")
560
561(define_insn_reservation "c2_sse_mul_DF_load" 5
d3c11974 562 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
563 (and (eq_attr "memory" "load")
564 (and (eq_attr "mode" "DF,V2DF")
565 (eq_attr "type" "ssemul"))))
566 "c2_decodern,c2_p2+c2_p0")
567
568(define_insn_reservation "c2_sse_div_SF" 18
d3c11974 569 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
570 (and (eq_attr "memory" "none")
571 (and (eq_attr "mode" "SF,V4SF")
572 (eq_attr "type" "ssediv"))))
573 "c2_decodern,c2_p0,c2_ssediv*17")
574
575(define_insn_reservation "c2_sse_div_SF_load" 18
d3c11974 576 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
577 (and (eq_attr "memory" "none")
578 (and (eq_attr "mode" "SF,V4SF")
579 (eq_attr "type" "ssediv"))))
580 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*17")
581
582(define_insn_reservation "c2_sse_div_DF" 32
d3c11974 583 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
584 (and (eq_attr "memory" "none")
585 (and (eq_attr "mode" "DF,V2DF")
586 (eq_attr "type" "ssediv"))))
587 "c2_decodern,c2_p0,c2_ssediv*31")
588
589(define_insn_reservation "c2_sse_div_DF_load" 32
d3c11974 590 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
591 (and (eq_attr "memory" "none")
592 (and (eq_attr "mode" "DF,V2DF")
593 (eq_attr "type" "ssediv"))))
594 "c2_decodern,(c2_p2+c2_p0),c2_ssediv*31")
595
596;; FIXME: these have limited throughput
597(define_insn_reservation "c2_sse_icvt_SF" 4
d3c11974 598 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
599 (and (eq_attr "memory" "none")
600 (and (eq_attr "mode" "SF")
601 (eq_attr "type" "sseicvt"))))
602 "c2_decodern,c2_p1")
603
604(define_insn_reservation "c2_sse_icvt_SF_load" 4
d3c11974 605 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
606 (and (eq_attr "memory" "!none")
607 (and (eq_attr "mode" "SF")
608 (eq_attr "type" "sseicvt"))))
609 "c2_decodern,c2_p2+c2_p1")
610
611(define_insn_reservation "c2_sse_icvt_DF" 4
d3c11974 612 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
613 (and (eq_attr "memory" "none")
614 (and (eq_attr "mode" "DF")
615 (eq_attr "type" "sseicvt"))))
616 "c2_decoder0,c2_p0+c2_p1")
617
618(define_insn_reservation "c2_sse_icvt_DF_load" 4
d3c11974 619 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
620 (and (eq_attr "memory" "!none")
621 (and (eq_attr "mode" "DF")
622 (eq_attr "type" "sseicvt"))))
623 "c2_decoder0,(c2_p2+c2_p1)")
624
625(define_insn_reservation "c2_sse_icvt_SI" 3
d3c11974 626 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
627 (and (eq_attr "memory" "none")
628 (and (eq_attr "mode" "SI")
629 (eq_attr "type" "sseicvt"))))
630 "c2_decodern,c2_p1")
631
632(define_insn_reservation "c2_sse_icvt_SI_load" 3
d3c11974 633 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
634 (and (eq_attr "memory" "!none")
635 (and (eq_attr "mode" "SI")
636 (eq_attr "type" "sseicvt"))))
637 "c2_decodern,(c2_p2+c2_p1)")
638
639(define_insn_reservation "c2_sse_mov" 1
d3c11974 640 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
641 (and (eq_attr "memory" "none")
642 (eq_attr "type" "ssemov")))
643 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
644
645(define_insn_reservation "c2_sse_mov_load" 2
d3c11974 646 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
647 (and (eq_attr "memory" "load")
648 (eq_attr "type" "ssemov")))
649 "c2_decodern,c2_p2")
650
651(define_insn_reservation "c2_sse_mov_store" 1
d3c11974 652 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
653 (and (eq_attr "memory" "store")
654 (eq_attr "type" "ssemov")))
655 "c2_decodern,c2_p4+c2_p3")
656
657;; All other instructions are modelled as simple instructions.
658;; We have already modelled all i387 floating point instructions, so all
659;; other instructions execute on either port 0, 1 or 5. This includes
660;; the ALU units, and the MMX units.
661;;
662;; reg-reg instructions produce 1 uop so they can be decoded on any of
663;; the three decoders. Loads benefit from micro-op fusion and can be
664;; treated in the same way.
665(define_insn_reservation "c2_insn" 1
d3c11974 666 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
667 (and (eq_attr "memory" "none,unknown")
668 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
669 "c2_decodern,(c2_p0|c2_p1|c2_p5)")
670
671(define_insn_reservation "c2_insn_load" 4
d3c11974 672 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
673 (and (eq_attr "memory" "load")
674 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
675 "c2_decodern,c2_p2,(c2_p0|c2_p1|c2_p5)")
676
677;; register-memory instructions have three uops, so they have to be
678;; decoded on c2_decoder0.
679(define_insn_reservation "c2_insn_store" 1
d3c11974 680 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
681 (and (eq_attr "memory" "store")
682 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp")))
683 "c2_decoder0,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3")
684
685;; read-modify-store instructions produce 4 uops so they have to be
686;; decoded on c2_decoder0 as well.
687(define_insn_reservation "c2_insn_both" 4
d3c11974 688 (and (eq_attr "cpu" "core2,nehalem")
660b994f
BS
689 (and (eq_attr "memory" "both")
690 (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp")))
691 "c2_decoder0,c2_p2,(c2_p0|c2_p1|c2_p5),c2_p4+c2_p3")