]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/x86-tune-costs.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / x86-tune-costs.h
CommitLineData
df41dbaf 1/* Costs of operations of individual x86 CPUs.
7adcbafe 2 Copyright (C) 1988-2022 Free Software Foundation, Inc.
64766e8d 3
df41dbaf
JH
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16Under Section 7 of GPL version 3, you are granted additional
17permissions described in the GCC Runtime Library Exception, version
183.1, as published by the Free Software Foundation.
19
20You should have received a copy of the GNU General Public License and
21a copy of the GCC Runtime Library Exception along with this program;
22see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23<http://www.gnu.org/licenses/>. */
64766e8d
JH
24/* Processor costs (relative to an add) */
25/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
26#define COSTS_N_BYTES(N) ((N) * 2)
27
28#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
29
30static stringop_algs ix86_size_memcpy[2] = {
31 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
32 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
33static stringop_algs ix86_size_memset[2] = {
34 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
35 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
36
37const
38struct processor_costs ix86_size_cost = {/* costs for tuning for size */
72bb85f8 39 {
d321551c
L
40 /* Start of register allocator costs. integer->integer move cost is 2. */
41 2, /* cost for loading QImode using movzbl */
42 {2, 2, 2}, /* cost of loading integer registers
43 in QImode, HImode and SImode.
44 Relative to reg-reg move (2). */
45 {2, 2, 2}, /* cost of storing integer registers */
46 2, /* cost of reg,reg fld/fst */
47 {2, 2, 2}, /* cost of loading fp registers
48 in SFmode, DFmode and XFmode */
49 {2, 2, 2}, /* cost of storing fp registers
50 in SFmode, DFmode and XFmode */
51 3, /* cost of moving MMX register */
52 {3, 3}, /* cost of loading MMX registers
53 in SImode and DImode */
54 {3, 3}, /* cost of storing MMX registers
55 in SImode and DImode */
56 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
57 {3, 3, 3, 3, 3}, /* cost of loading SSE registers
58 in 32,64,128,256 and 512-bit */
59 {3, 3, 3, 3, 3}, /* cost of storing SSE registers
60 in 32,64,128,256 and 512-bit */
ecc3135a 61 3, 3, /* SSE->integer and integer->SSE moves */
62 3, 3, /* mask->integer and integer->mask moves */
00cb3494
L
63 {2, 2, 2}, /* cost of loading mask register
64 in QImode, HImode, SImode. */
65 {2, 2, 2}, /* cost if storing mask register
66 in QImode, HImode, SImode. */
67 2, /* cost of moving mask register. */
d321551c 68 /* End of register allocator costs. */
72bb85f8 69 },
d321551c 70
64766e8d
JH
71 COSTS_N_BYTES (2), /* cost of an add instruction */
72 COSTS_N_BYTES (3), /* cost of a lea instruction */
73 COSTS_N_BYTES (2), /* variable shift costs */
74 COSTS_N_BYTES (3), /* constant shift costs */
75 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 0, /* cost of multiply per each bit set */
81 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
82 COSTS_N_BYTES (3), /* HI */
83 COSTS_N_BYTES (3), /* SI */
84 COSTS_N_BYTES (3), /* DI */
85 COSTS_N_BYTES (5)}, /* other */
86 COSTS_N_BYTES (3), /* cost of movsx */
87 COSTS_N_BYTES (3), /* cost of movzx */
88 0, /* "large" insn */
89 2, /* MOVE_RATIO */
25e22b19 90 2, /* CLEAR_RATIO */
64766e8d
JH
91 {2, 2, 2}, /* cost of loading integer registers
92 in QImode, HImode and SImode.
93 Relative to reg-reg move (2). */
94 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
95 {3, 3, 3, 3, 3}, /* cost of loading SSE register
96 in 32bit, 64bit, 128bit, 256bit and 512bit */
97 {3, 3, 3, 3, 3}, /* cost of storing SSE register
98 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf
JH
99 {3, 3, 3, 3, 3}, /* cost of unaligned SSE load
100 in 128bit, 256bit and 512bit */
d321551c 101 {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
df41dbaf 102 in 128bit, 256bit and 512bit */
d321551c
L
103 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
104 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
105 5, 0, /* Gather load static, per_elt. */
106 5, 0, /* Gather store static, per_elt. */
64766e8d
JH
107 0, /* size of l1 cache */
108 0, /* size of l2 cache */
109 0, /* size of prefetch block */
110 0, /* number of parallel prefetches */
111 2, /* Branch cost */
112 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
113 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
114 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
115 COSTS_N_BYTES (2), /* cost of FABS instruction. */
116 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
117 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
6065f444 118
c53c148c 119 COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
6065f444
JH
120 COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
121 COSTS_N_BYTES (2), /* cost of MULSS instruction. */
122 COSTS_N_BYTES (2), /* cost of MULSD instruction. */
c53c148c
JH
123 COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
124 COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
6065f444
JH
125 COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
126 COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
127 COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
128 COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
64766e8d
JH
129 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
130 ix86_size_memcpy,
131 ix86_size_memset,
f6fd8f2b
JH
132 COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
133 COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
134 NULL, /* Loop alignment. */
135 NULL, /* Jump alignment. */
136 NULL, /* Label alignment. */
137 NULL, /* Func alignment. */
64766e8d
JH
138};
139
140/* Processor costs (relative to an add) */
141static stringop_algs i386_memcpy[2] = {
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
143 DUMMY_STRINGOP_ALGS};
144static stringop_algs i386_memset[2] = {
145 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
146 DUMMY_STRINGOP_ALGS};
147
148static const
149struct processor_costs i386_cost = { /* 386 specific costs */
72bb85f8 150 {
d321551c
L
151 /* Start of register allocator costs. integer->integer move cost is 2. */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
168 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
169 in 32,64,128,256 and 512-bit */
170 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
171 in 32,64,128,256 and 512-bit */
ecc3135a 172 3, 3, /* SSE->integer and integer->SSE moves */
173 3, 3, /* mask->integer and integer->mask moves */
00cb3494
L
174 {2, 4, 2}, /* cost of loading mask register
175 in QImode, HImode, SImode. */
176 {2, 4, 2}, /* cost if storing mask register
177 in QImode, HImode, SImode. */
178 2, /* cost of moving mask register. */
d321551c 179 /* End of register allocator costs. */
72bb85f8 180 },
d321551c 181
64766e8d
JH
182 COSTS_N_INSNS (1), /* cost of an add instruction */
183 COSTS_N_INSNS (1), /* cost of a lea instruction */
184 COSTS_N_INSNS (3), /* variable shift costs */
185 COSTS_N_INSNS (2), /* constant shift costs */
186 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
187 COSTS_N_INSNS (6), /* HI */
188 COSTS_N_INSNS (6), /* SI */
189 COSTS_N_INSNS (6), /* DI */
190 COSTS_N_INSNS (6)}, /* other */
191 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
192 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
193 COSTS_N_INSNS (23), /* HI */
194 COSTS_N_INSNS (23), /* SI */
195 COSTS_N_INSNS (23), /* DI */
196 COSTS_N_INSNS (23)}, /* other */
197 COSTS_N_INSNS (3), /* cost of movsx */
198 COSTS_N_INSNS (2), /* cost of movzx */
199 15, /* "large" insn */
200 3, /* MOVE_RATIO */
25e22b19 201 3, /* CLEAR_RATIO */
64766e8d
JH
202 {2, 4, 2}, /* cost of loading integer registers
203 in QImode, HImode and SImode.
204 Relative to reg-reg move (2). */
205 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
206 {4, 8, 16, 32, 64}, /* cost of loading SSE register
207 in 32bit, 64bit, 128bit, 256bit and 512bit */
208 {4, 8, 16, 32, 64}, /* cost of storing SSE register
209 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 210 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 211 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
212 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
213 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
214 4, 4, /* Gather load static, per_elt. */
215 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
216 0, /* size of l1 cache */
217 0, /* size of l2 cache */
218 0, /* size of prefetch block */
219 0, /* number of parallel prefetches */
220 1, /* Branch cost */
221 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
222 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
223 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
224 COSTS_N_INSNS (22), /* cost of FABS instruction. */
225 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
226 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
6065f444 227
c53c148c 228 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
229 COSTS_N_INSNS (23), /* cost of ADDSS/SD SUBSS/SD insns. */
230 COSTS_N_INSNS (27), /* cost of MULSS instruction. */
231 COSTS_N_INSNS (27), /* cost of MULSD instruction. */
c53c148c
JH
232 COSTS_N_INSNS (27), /* cost of FMA SS instruction. */
233 COSTS_N_INSNS (27), /* cost of FMA SD instruction. */
6065f444
JH
234 COSTS_N_INSNS (88), /* cost of DIVSS instruction. */
235 COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
236 COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
237 COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
64766e8d
JH
238 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
239 i386_memcpy,
240 i386_memset,
f6fd8f2b
JH
241 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
242 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
243 "4", /* Loop alignment. */
244 "4", /* Jump alignment. */
245 NULL, /* Label alignment. */
246 "4", /* Func alignment. */
64766e8d
JH
247};
248
249static stringop_algs i486_memcpy[2] = {
250 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
251 DUMMY_STRINGOP_ALGS};
252static stringop_algs i486_memset[2] = {
253 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
254 DUMMY_STRINGOP_ALGS};
255
256static const
257struct processor_costs i486_cost = { /* 486 specific costs */
72bb85f8 258 {
d321551c
L
259 /* Start of register allocator costs. integer->integer move cost is 2. */
260 4, /* cost for loading QImode using movzbl */
261 {2, 4, 2}, /* cost of loading integer registers
262 in QImode, HImode and SImode.
263 Relative to reg-reg move (2). */
264 {2, 4, 2}, /* cost of storing integer registers */
265 2, /* cost of reg,reg fld/fst */
266 {8, 8, 8}, /* cost of loading fp registers
267 in SFmode, DFmode and XFmode */
268 {8, 8, 8}, /* cost of storing fp registers
269 in SFmode, DFmode and XFmode */
270 2, /* cost of moving MMX register */
271 {4, 8}, /* cost of loading MMX registers
272 in SImode and DImode */
273 {4, 8}, /* cost of storing MMX registers
274 in SImode and DImode */
275 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
276 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
277 in 32,64,128,256 and 512-bit */
278 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
279 in 32,64,128,256 and 512-bit */
ecc3135a 280 3, 3, /* SSE->integer and integer->SSE moves */
281 3, 3, /* mask->integer and integer->mask moves */
00cb3494
L
282 {2, 4, 2}, /* cost of loading mask register
283 in QImode, HImode, SImode. */
284 {2, 4, 2}, /* cost if storing mask register
285 in QImode, HImode, SImode. */
286 2, /* cost of moving mask register. */
d321551c 287 /* End of register allocator costs. */
72bb85f8 288 },
d321551c 289
64766e8d
JH
290 COSTS_N_INSNS (1), /* cost of an add instruction */
291 COSTS_N_INSNS (1), /* cost of a lea instruction */
292 COSTS_N_INSNS (3), /* variable shift costs */
293 COSTS_N_INSNS (2), /* constant shift costs */
294 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
295 COSTS_N_INSNS (12), /* HI */
296 COSTS_N_INSNS (12), /* SI */
297 COSTS_N_INSNS (12), /* DI */
298 COSTS_N_INSNS (12)}, /* other */
299 1, /* cost of multiply per each bit set */
300 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
301 COSTS_N_INSNS (40), /* HI */
302 COSTS_N_INSNS (40), /* SI */
303 COSTS_N_INSNS (40), /* DI */
304 COSTS_N_INSNS (40)}, /* other */
305 COSTS_N_INSNS (3), /* cost of movsx */
306 COSTS_N_INSNS (2), /* cost of movzx */
307 15, /* "large" insn */
308 3, /* MOVE_RATIO */
25e22b19 309 3, /* CLEAR_RATIO */
64766e8d
JH
310 {2, 4, 2}, /* cost of loading integer registers
311 in QImode, HImode and SImode.
312 Relative to reg-reg move (2). */
313 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
314 {4, 8, 16, 32, 64}, /* cost of loading SSE register
315 in 32bit, 64bit, 128bit, 256bit and 512bit */
316 {4, 8, 16, 32, 64}, /* cost of storing SSE register
317 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 318 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 319 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
320 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
321 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
322 4, 4, /* Gather load static, per_elt. */
323 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
324 4, /* size of l1 cache. 486 has 8kB cache
325 shared for code and data, so 4kB is
326 not really precise. */
327 4, /* size of l2 cache */
328 0, /* size of prefetch block */
329 0, /* number of parallel prefetches */
330 1, /* Branch cost */
331 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
332 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
333 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
334 COSTS_N_INSNS (3), /* cost of FABS instruction. */
335 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
336 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
6065f444 337
c53c148c 338 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
339 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
340 COSTS_N_INSNS (16), /* cost of MULSS instruction. */
341 COSTS_N_INSNS (16), /* cost of MULSD instruction. */
c53c148c
JH
342 COSTS_N_INSNS (16), /* cost of FMA SS instruction. */
343 COSTS_N_INSNS (16), /* cost of FMA SD instruction. */
6065f444
JH
344 COSTS_N_INSNS (73), /* cost of DIVSS instruction. */
345 COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
346 COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
347 COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
64766e8d
JH
348 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
349 i486_memcpy,
350 i486_memset,
f6fd8f2b
JH
351 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
352 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
353 "16", /* Loop alignment. */
354 "16", /* Jump alignment. */
355 "0:0:8", /* Label alignment. */
356 "16", /* Func alignment. */
64766e8d
JH
357};
358
359static stringop_algs pentium_memcpy[2] = {
360 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
361 DUMMY_STRINGOP_ALGS};
362static stringop_algs pentium_memset[2] = {
363 {libcall, {{-1, rep_prefix_4_byte, false}}},
364 DUMMY_STRINGOP_ALGS};
365
366static const
367struct processor_costs pentium_cost = {
72bb85f8 368 {
d321551c
L
369 /* Start of register allocator costs. integer->integer move cost is 2. */
370 6, /* cost for loading QImode using movzbl */
371 {2, 4, 2}, /* cost of loading integer registers
372 in QImode, HImode and SImode.
373 Relative to reg-reg move (2). */
374 {2, 4, 2}, /* cost of storing integer registers */
375 2, /* cost of reg,reg fld/fst */
376 {2, 2, 6}, /* cost of loading fp registers
377 in SFmode, DFmode and XFmode */
378 {4, 4, 6}, /* cost of storing fp registers
379 in SFmode, DFmode and XFmode */
380 8, /* cost of moving MMX register */
381 {8, 8}, /* cost of loading MMX registers
382 in SImode and DImode */
383 {8, 8}, /* cost of storing MMX registers
384 in SImode and DImode */
385 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
386 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
387 in 32,64,128,256 and 512-bit */
388 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
389 in 32,64,128,256 and 512-bit */
ecc3135a 390 3, 3, /* SSE->integer and integer->SSE moves */
391 3, 3, /* mask->integer and integer->mask moves */
00cb3494
L
392 {2, 4, 2}, /* cost of loading mask register
393 in QImode, HImode, SImode. */
394 {2, 4, 2}, /* cost if storing mask register
395 in QImode, HImode, SImode. */
396 2, /* cost of moving mask register. */
d321551c 397 /* End of register allocator costs. */
72bb85f8 398 },
d321551c 399
64766e8d
JH
400 COSTS_N_INSNS (1), /* cost of an add instruction */
401 COSTS_N_INSNS (1), /* cost of a lea instruction */
402 COSTS_N_INSNS (4), /* variable shift costs */
403 COSTS_N_INSNS (1), /* constant shift costs */
404 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
405 COSTS_N_INSNS (11), /* HI */
406 COSTS_N_INSNS (11), /* SI */
407 COSTS_N_INSNS (11), /* DI */
408 COSTS_N_INSNS (11)}, /* other */
409 0, /* cost of multiply per each bit set */
410 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
411 COSTS_N_INSNS (25), /* HI */
412 COSTS_N_INSNS (25), /* SI */
413 COSTS_N_INSNS (25), /* DI */
414 COSTS_N_INSNS (25)}, /* other */
415 COSTS_N_INSNS (3), /* cost of movsx */
416 COSTS_N_INSNS (2), /* cost of movzx */
417 8, /* "large" insn */
418 6, /* MOVE_RATIO */
25e22b19 419 6, /* CLEAR_RATIO */
64766e8d
JH
420 {2, 4, 2}, /* cost of loading integer registers
421 in QImode, HImode and SImode.
422 Relative to reg-reg move (2). */
423 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
424 {4, 8, 16, 32, 64}, /* cost of loading SSE register
425 in 32bit, 64bit, 128bit, 256bit and 512bit */
426 {4, 8, 16, 32, 64}, /* cost of storing SSE register
427 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 428 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 429 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
430 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
431 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
432 4, 4, /* Gather load static, per_elt. */
433 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
434 8, /* size of l1 cache. */
435 8, /* size of l2 cache */
436 0, /* size of prefetch block */
437 0, /* number of parallel prefetches */
438 2, /* Branch cost */
439 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
440 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
441 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
442 COSTS_N_INSNS (1), /* cost of FABS instruction. */
443 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
444 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
6065f444 445
c53c148c 446 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
447 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
448 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
449 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
c53c148c
JH
450 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
451 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
452 COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
453 COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
454 COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
455 COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
64766e8d
JH
456 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
457 pentium_memcpy,
458 pentium_memset,
f6fd8f2b
JH
459 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
460 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
461 "16:8:8", /* Loop alignment. */
462 "16:8:8", /* Jump alignment. */
463 "0:0:8", /* Label alignment. */
464 "16", /* Func alignment. */
64766e8d
JH
465};
466
467static const
468struct processor_costs lakemont_cost = {
72bb85f8 469 {
d321551c
L
470 /* Start of register allocator costs. integer->integer move cost is 2. */
471 6, /* cost for loading QImode using movzbl */
472 {2, 4, 2}, /* cost of loading integer registers
473 in QImode, HImode and SImode.
474 Relative to reg-reg move (2). */
475 {2, 4, 2}, /* cost of storing integer registers */
476 2, /* cost of reg,reg fld/fst */
477 {2, 2, 6}, /* cost of loading fp registers
478 in SFmode, DFmode and XFmode */
479 {4, 4, 6}, /* cost of storing fp registers
480 in SFmode, DFmode and XFmode */
481 8, /* cost of moving MMX register */
482 {8, 8}, /* cost of loading MMX registers
483 in SImode and DImode */
484 {8, 8}, /* cost of storing MMX registers
485 in SImode and DImode */
486 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
487 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
488 in 32,64,128,256 and 512-bit */
489 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
490 in 32,64,128,256 and 512-bit */
ecc3135a 491 3, 3, /* SSE->integer and integer->SSE moves */
492 3, 3, /* mask->integer and integer->mask moves */
00cb3494
L
493 {2, 4, 2}, /* cost of loading mask register
494 in QImode, HImode, SImode. */
495 {2, 4, 2}, /* cost if storing mask register
496 in QImode, HImode, SImode. */
497 2, /* cost of moving mask register. */
d321551c 498 /* End of register allocator costs. */
72bb85f8 499 },
d321551c 500
64766e8d
JH
501 COSTS_N_INSNS (1), /* cost of an add instruction */
502 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
503 COSTS_N_INSNS (1), /* variable shift costs */
504 COSTS_N_INSNS (1), /* constant shift costs */
505 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
506 COSTS_N_INSNS (11), /* HI */
507 COSTS_N_INSNS (11), /* SI */
508 COSTS_N_INSNS (11), /* DI */
509 COSTS_N_INSNS (11)}, /* other */
510 0, /* cost of multiply per each bit set */
511 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
512 COSTS_N_INSNS (25), /* HI */
513 COSTS_N_INSNS (25), /* SI */
514 COSTS_N_INSNS (25), /* DI */
515 COSTS_N_INSNS (25)}, /* other */
516 COSTS_N_INSNS (3), /* cost of movsx */
517 COSTS_N_INSNS (2), /* cost of movzx */
518 8, /* "large" insn */
519 17, /* MOVE_RATIO */
25e22b19 520 6, /* CLEAR_RATIO */
64766e8d
JH
521 {2, 4, 2}, /* cost of loading integer registers
522 in QImode, HImode and SImode.
523 Relative to reg-reg move (2). */
524 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
525 {4, 8, 16, 32, 64}, /* cost of loading SSE register
526 in 32bit, 64bit, 128bit, 256bit and 512bit */
527 {4, 8, 16, 32, 64}, /* cost of storing SSE register
528 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 529 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 530 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
531 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
532 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
533 4, 4, /* Gather load static, per_elt. */
534 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
535 8, /* size of l1 cache. */
536 8, /* size of l2 cache */
537 0, /* size of prefetch block */
538 0, /* number of parallel prefetches */
539 2, /* Branch cost */
540 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
541 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
542 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
543 COSTS_N_INSNS (1), /* cost of FABS instruction. */
544 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
545 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
6065f444 546
c53c148c 547 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
548 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
549 COSTS_N_INSNS (5), /* cost of MULSS instruction. */
550 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
551 COSTS_N_INSNS (10), /* cost of FMA SS instruction. */
552 COSTS_N_INSNS (10), /* cost of FMA SD instruction. */
6065f444
JH
553 COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
554 COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
555 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
556 COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
64766e8d
JH
557 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
558 pentium_memcpy,
559 pentium_memset,
f6fd8f2b
JH
560 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
561 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
562 "16:8:8", /* Loop alignment. */
563 "16:8:8", /* Jump alignment. */
564 "0:0:8", /* Label alignment. */
565 "16", /* Func alignment. */
64766e8d
JH
566};
567
568/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
569 (we ensure the alignment). For small blocks inline loop is still a
570 noticeable win, for bigger blocks either rep movsl or rep movsb is
571 way to go. Rep movsb has apparently more expensive startup time in CPU,
572 but after 4K the difference is down in the noise. */
573static stringop_algs pentiumpro_memcpy[2] = {
574 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
575 {8192, rep_prefix_4_byte, false},
576 {-1, rep_prefix_1_byte, false}}},
577 DUMMY_STRINGOP_ALGS};
578static stringop_algs pentiumpro_memset[2] = {
579 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
580 {8192, rep_prefix_4_byte, false},
581 {-1, libcall, false}}},
582 DUMMY_STRINGOP_ALGS};
583static const
584struct processor_costs pentiumpro_cost = {
72bb85f8 585 {
d321551c
L
586 /* Start of register allocator costs. integer->integer move cost is 2. */
587 2, /* cost for loading QImode using movzbl */
588 {4, 4, 4}, /* cost of loading integer registers
589 in QImode, HImode and SImode.
590 Relative to reg-reg move (2). */
591 {2, 2, 2}, /* cost of storing integer registers */
592 2, /* cost of reg,reg fld/fst */
593 {2, 2, 6}, /* cost of loading fp registers
594 in SFmode, DFmode and XFmode */
595 {4, 4, 6}, /* cost of storing fp registers
596 in SFmode, DFmode and XFmode */
597 2, /* cost of moving MMX register */
598 {2, 2}, /* cost of loading MMX registers
599 in SImode and DImode */
600 {2, 2}, /* cost of storing MMX registers
601 in SImode and DImode */
602 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
603 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
604 in 32,64,128,256 and 512-bit */
605 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
606 in 32,64,128,256 and 512-bit */
ecc3135a 607 3, 3, /* SSE->integer and integer->SSE moves */
608 3, 3, /* mask->integer and integer->mask moves */
00cb3494
L
609 {4, 4, 4}, /* cost of loading mask register
610 in QImode, HImode, SImode. */
611 {2, 2, 2}, /* cost if storing mask register
612 in QImode, HImode, SImode. */
613 2, /* cost of moving mask register. */
d321551c 614 /* End of register allocator costs. */
72bb85f8 615 },
d321551c 616
64766e8d
JH
617 COSTS_N_INSNS (1), /* cost of an add instruction */
618 COSTS_N_INSNS (1), /* cost of a lea instruction */
619 COSTS_N_INSNS (1), /* variable shift costs */
620 COSTS_N_INSNS (1), /* constant shift costs */
621 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
622 COSTS_N_INSNS (4), /* HI */
623 COSTS_N_INSNS (4), /* SI */
624 COSTS_N_INSNS (4), /* DI */
625 COSTS_N_INSNS (4)}, /* other */
626 0, /* cost of multiply per each bit set */
627 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
628 COSTS_N_INSNS (17), /* HI */
629 COSTS_N_INSNS (17), /* SI */
630 COSTS_N_INSNS (17), /* DI */
631 COSTS_N_INSNS (17)}, /* other */
632 COSTS_N_INSNS (1), /* cost of movsx */
633 COSTS_N_INSNS (1), /* cost of movzx */
634 8, /* "large" insn */
635 6, /* MOVE_RATIO */
25e22b19 636 6, /* CLEAR_RATIO */
64766e8d
JH
637 {4, 4, 4}, /* cost of loading integer registers
638 in QImode, HImode and SImode.
639 Relative to reg-reg move (2). */
640 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
641 {4, 8, 16, 32, 64}, /* cost of loading SSE register
642 in 32bit, 64bit, 128bit, 256bit and 512bit */
643 {4, 8, 16, 32, 64}, /* cost of storing SSE register
644 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 645 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 646 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
647 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
648 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
649 4, 4, /* Gather load static, per_elt. */
650 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
651 8, /* size of l1 cache. */
652 256, /* size of l2 cache */
653 32, /* size of prefetch block */
654 6, /* number of parallel prefetches */
655 2, /* Branch cost */
656 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
657 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
658 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
659 COSTS_N_INSNS (2), /* cost of FABS instruction. */
660 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
661 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
6065f444 662
c53c148c 663 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
664 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
665 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
666 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
667 COSTS_N_INSNS (7), /* cost of FMA SS instruction. */
668 COSTS_N_INSNS (7), /* cost of FMA SD instruction. */
6065f444
JH
669 COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
670 COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
671 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
672 COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
64766e8d
JH
673 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
674 pentiumpro_memcpy,
675 pentiumpro_memset,
f6fd8f2b
JH
676 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
677 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
678 "16", /* Loop alignment. */
679 "16:11:8", /* Jump alignment. */
680 "0:0:8", /* Label alignment. */
681 "16", /* Func alignment. */
64766e8d
JH
682};
683
684static stringop_algs geode_memcpy[2] = {
685 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
686 DUMMY_STRINGOP_ALGS};
687static stringop_algs geode_memset[2] = {
688 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
689 DUMMY_STRINGOP_ALGS};
690static const
691struct processor_costs geode_cost = {
72bb85f8 692 {
d321551c
L
693 /* Start of register allocator costs. integer->integer move cost is 2. */
694 2, /* cost for loading QImode using movzbl */
695 {2, 2, 2}, /* cost of loading integer registers
696 in QImode, HImode and SImode.
697 Relative to reg-reg move (2). */
698 {2, 2, 2}, /* cost of storing integer registers */
699 2, /* cost of reg,reg fld/fst */
700 {2, 2, 2}, /* cost of loading fp registers
701 in SFmode, DFmode and XFmode */
702 {4, 6, 6}, /* cost of storing fp registers
703 in SFmode, DFmode and XFmode */
704 2, /* cost of moving MMX register */
705 {2, 2}, /* cost of loading MMX registers
706 in SImode and DImode */
707 {2, 2}, /* cost of storing MMX registers
708 in SImode and DImode */
709 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
710 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
711 in 32,64,128,256 and 512-bit */
712 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
713 in 32,64,128,256 and 512-bit */
ecc3135a 714 6, 6, /* SSE->integer and integer->SSE moves */
715 6, 6, /* mask->integer and integer->mask moves */
00cb3494
L
716 {2, 2, 2}, /* cost of loading mask register
717 in QImode, HImode, SImode. */
718 {2, 2, 2}, /* cost if storing mask register
719 in QImode, HImode, SImode. */
720 2, /* cost of moving mask register. */
d321551c 721 /* End of register allocator costs. */
72bb85f8 722 },
d321551c 723
64766e8d
JH
724 COSTS_N_INSNS (1), /* cost of an add instruction */
725 COSTS_N_INSNS (1), /* cost of a lea instruction */
726 COSTS_N_INSNS (2), /* variable shift costs */
727 COSTS_N_INSNS (1), /* constant shift costs */
728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
729 COSTS_N_INSNS (4), /* HI */
730 COSTS_N_INSNS (7), /* SI */
731 COSTS_N_INSNS (7), /* DI */
732 COSTS_N_INSNS (7)}, /* other */
733 0, /* cost of multiply per each bit set */
734 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
735 COSTS_N_INSNS (23), /* HI */
736 COSTS_N_INSNS (39), /* SI */
737 COSTS_N_INSNS (39), /* DI */
738 COSTS_N_INSNS (39)}, /* other */
739 COSTS_N_INSNS (1), /* cost of movsx */
740 COSTS_N_INSNS (1), /* cost of movzx */
741 8, /* "large" insn */
742 4, /* MOVE_RATIO */
25e22b19 743 4, /* CLEAR_RATIO */
df41dbaf 744 {2, 2, 2}, /* cost of loading integer registers
64766e8d
JH
745 in QImode, HImode and SImode.
746 Relative to reg-reg move (2). */
df41dbaf 747 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
748 {2, 2, 8, 16, 32}, /* cost of loading SSE register
749 in 32bit, 64bit, 128bit, 256bit and 512bit */
750 {2, 2, 8, 16, 32}, /* cost of storing SSE register
751 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 752 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
df41dbaf 753 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
754 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
755 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
756 2, 2, /* Gather load static, per_elt. */
757 2, 2, /* Gather store static, per_elt. */
64766e8d
JH
758 64, /* size of l1 cache. */
759 128, /* size of l2 cache. */
760 32, /* size of prefetch block */
761 1, /* number of parallel prefetches */
762 1, /* Branch cost */
763 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
764 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
765 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
766 COSTS_N_INSNS (1), /* cost of FABS instruction. */
767 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
768 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
6065f444 769
c53c148c 770 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
771 COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
772 COSTS_N_INSNS (11), /* cost of MULSS instruction. */
773 COSTS_N_INSNS (11), /* cost of MULSD instruction. */
c53c148c
JH
774 COSTS_N_INSNS (17), /* cost of FMA SS instruction. */
775 COSTS_N_INSNS (17), /* cost of FMA SD instruction. */
6065f444
JH
776 COSTS_N_INSNS (47), /* cost of DIVSS instruction. */
777 COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
778 COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
779 COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
64766e8d
JH
780 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
781 geode_memcpy,
782 geode_memset,
f6fd8f2b
JH
783 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
784 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
785 NULL, /* Loop alignment. */
786 NULL, /* Jump alignment. */
787 NULL, /* Label alignment. */
788 NULL, /* Func alignment. */
64766e8d
JH
789};
790
791static stringop_algs k6_memcpy[2] = {
792 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
793 DUMMY_STRINGOP_ALGS};
794static stringop_algs k6_memset[2] = {
795 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
796 DUMMY_STRINGOP_ALGS};
797static const
798struct processor_costs k6_cost = {
72bb85f8 799 {
d321551c
L
800 /* Start of register allocator costs. integer->integer move cost is 2. */
801 3, /* cost for loading QImode using movzbl */
802 {4, 5, 4}, /* cost of loading integer registers
803 in QImode, HImode and SImode.
804 Relative to reg-reg move (2). */
805 {2, 3, 2}, /* cost of storing integer registers */
806 4, /* cost of reg,reg fld/fst */
807 {6, 6, 6}, /* cost of loading fp registers
808 in SFmode, DFmode and XFmode */
809 {4, 4, 4}, /* cost of storing fp registers
810 in SFmode, DFmode and XFmode */
811 2, /* cost of moving MMX register */
812 {2, 2}, /* cost of loading MMX registers
813 in SImode and DImode */
814 {2, 2}, /* cost of storing MMX registers
815 in SImode and DImode */
816 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
817 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
818 in 32,64,128,256 and 512-bit */
819 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
820 in 32,64,128,256 and 512-bit */
ecc3135a 821 6, 6, /* SSE->integer and integer->SSE moves */
822 6, 6, /* mask->integer and integer->mask moves */
00cb3494
L
823 {4, 5, 4}, /* cost of loading mask register
824 in QImode, HImode, SImode. */
825 {2, 3, 2}, /* cost if storing mask register
826 in QImode, HImode, SImode. */
827 2, /* cost of moving mask register. */
d321551c 828 /* End of register allocator costs. */
72bb85f8 829 },
d321551c 830
64766e8d
JH
831 COSTS_N_INSNS (1), /* cost of an add instruction */
832 COSTS_N_INSNS (2), /* cost of a lea instruction */
833 COSTS_N_INSNS (1), /* variable shift costs */
834 COSTS_N_INSNS (1), /* constant shift costs */
835 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
836 COSTS_N_INSNS (3), /* HI */
837 COSTS_N_INSNS (3), /* SI */
838 COSTS_N_INSNS (3), /* DI */
839 COSTS_N_INSNS (3)}, /* other */
840 0, /* cost of multiply per each bit set */
841 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
842 COSTS_N_INSNS (18), /* HI */
843 COSTS_N_INSNS (18), /* SI */
844 COSTS_N_INSNS (18), /* DI */
845 COSTS_N_INSNS (18)}, /* other */
846 COSTS_N_INSNS (2), /* cost of movsx */
847 COSTS_N_INSNS (2), /* cost of movzx */
848 8, /* "large" insn */
849 4, /* MOVE_RATIO */
25e22b19 850 4, /* CLEAR_RATIO */
64766e8d
JH
851 {4, 5, 4}, /* cost of loading integer registers
852 in QImode, HImode and SImode.
853 Relative to reg-reg move (2). */
854 {2, 3, 2}, /* cost of storing integer registers */
d321551c
L
855 {2, 2, 8, 16, 32}, /* cost of loading SSE register
856 in 32bit, 64bit, 128bit, 256bit and 512bit */
857 {2, 2, 8, 16, 32}, /* cost of storing SSE register
858 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 859 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
df41dbaf 860 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
861 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
862 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
863 2, 2, /* Gather load static, per_elt. */
864 2, 2, /* Gather store static, per_elt. */
64766e8d
JH
865 32, /* size of l1 cache. */
866 32, /* size of l2 cache. Some models
867 have integrated l2 cache, but
868 optimizing for k6 is not important
869 enough to worry about that. */
870 32, /* size of prefetch block */
871 1, /* number of parallel prefetches */
872 1, /* Branch cost */
873 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
874 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
875 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
876 COSTS_N_INSNS (2), /* cost of FABS instruction. */
877 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
878 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
6065f444 879
c53c148c 880 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
881 COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
882 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
883 COSTS_N_INSNS (2), /* cost of MULSD instruction. */
c53c148c
JH
884 COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
885 COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
6065f444
JH
886 COSTS_N_INSNS (56), /* cost of DIVSS instruction. */
887 COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
888 COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
889 COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
64766e8d
JH
890 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
891 k6_memcpy,
892 k6_memset,
f6fd8f2b
JH
893 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
894 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
895 "32:8:8", /* Loop alignment. */
896 "32:8:8", /* Jump alignment. */
897 "0:0:8", /* Label alignment. */
898 "32", /* Func alignment. */
64766e8d
JH
899};
900
901/* For some reason, Athlon deals better with REP prefix (relative to loops)
902 compared to K8. Alignment becomes important after 8 bytes for memcpy and
903 128 bytes for memset. */
904static stringop_algs athlon_memcpy[2] = {
905 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
906 DUMMY_STRINGOP_ALGS};
907static stringop_algs athlon_memset[2] = {
908 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
909 DUMMY_STRINGOP_ALGS};
910static const
911struct processor_costs athlon_cost = {
72bb85f8 912 {
d321551c
L
913 /* Start of register allocator costs. integer->integer move cost is 2. */
914 4, /* cost for loading QImode using movzbl */
915 {3, 4, 3}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {3, 4, 3}, /* cost of storing integer registers */
919 4, /* cost of reg,reg fld/fst */
920 {4, 4, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {6, 6, 8}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 2, /* cost of moving MMX register */
925 {4, 4}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {4, 4}, /* cost of storing MMX registers
928 in SImode and DImode */
929 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
930 {4, 4, 12, 12, 24}, /* cost of loading SSE registers
931 in 32,64,128,256 and 512-bit */
932 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
933 in 32,64,128,256 and 512-bit */
ecc3135a 934 5, 5, /* SSE->integer and integer->SSE moves */
935 5, 5, /* mask->integer and integer->mask moves */
00cb3494
L
936 {3, 4, 3}, /* cost of loading mask register
937 in QImode, HImode, SImode. */
938 {3, 4, 3}, /* cost if storing mask register
939 in QImode, HImode, SImode. */
940 2, /* cost of moving mask register. */
d321551c 941 /* End of register allocator costs. */
72bb85f8 942 },
d321551c 943
64766e8d
JH
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (5), /* HI */
950 COSTS_N_INSNS (5), /* SI */
951 COSTS_N_INSNS (5), /* DI */
952 COSTS_N_INSNS (5)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (26), /* HI */
956 COSTS_N_INSNS (42), /* SI */
957 COSTS_N_INSNS (74), /* DI */
958 COSTS_N_INSNS (74)}, /* other */
959 COSTS_N_INSNS (1), /* cost of movsx */
960 COSTS_N_INSNS (1), /* cost of movzx */
961 8, /* "large" insn */
962 9, /* MOVE_RATIO */
25e22b19 963 6, /* CLEAR_RATIO */
64766e8d
JH
964 {3, 4, 3}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {3, 4, 3}, /* cost of storing integer registers */
d321551c
L
968 {4, 4, 12, 12, 24}, /* cost of loading SSE register
969 in 32bit, 64bit, 128bit, 256bit and 512bit */
970 {4, 4, 10, 10, 20}, /* cost of storing SSE register
971 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 972 {4, 4, 12, 12, 24}, /* cost of unaligned loads. */
b7167993 973 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
d321551c
L
974 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
975 5, /* cost of moving SSE register to integer. */
a4fe6139
JH
976 4, 4, /* Gather load static, per_elt. */
977 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
978 64, /* size of l1 cache. */
979 256, /* size of l2 cache. */
980 64, /* size of prefetch block */
981 6, /* number of parallel prefetches */
982 5, /* Branch cost */
983 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
984 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
985 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
986 COSTS_N_INSNS (2), /* cost of FABS instruction. */
987 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
988 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 989
c53c148c 990 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
991 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
992 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
993 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
994 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
995 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
996 /* 11-16 */
997 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
998 COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
999 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
1000 COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
64766e8d
JH
1001 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1002 athlon_memcpy,
1003 athlon_memset,
f6fd8f2b
JH
1004 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1005 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1006 "16:8:8", /* Loop alignment. */
1007 "16:8:8", /* Jump alignment. */
1008 "0:0:8", /* Label alignment. */
1009 "16", /* Func alignment. */
64766e8d
JH
1010};
1011
1012/* K8 has optimized REP instruction for medium sized blocks, but for very
1013 small blocks it is better to use loop. For large blocks, libcall can
1014 do nontemporary accesses and beat inline considerably. */
1015static stringop_algs k8_memcpy[2] = {
1016 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1017 {-1, rep_prefix_4_byte, false}}},
1018 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1019 {-1, libcall, false}}}};
1020static stringop_algs k8_memset[2] = {
1021 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1022 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1023 {libcall, {{48, unrolled_loop, false},
1024 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1025static const
1026struct processor_costs k8_cost = {
72bb85f8 1027 {
d321551c
L
1028 /* Start of register allocator costs. integer->integer move cost is 2. */
1029 4, /* cost for loading QImode using movzbl */
1030 {3, 4, 3}, /* cost of loading integer registers
1031 in QImode, HImode and SImode.
1032 Relative to reg-reg move (2). */
1033 {3, 4, 3}, /* cost of storing integer registers */
1034 4, /* cost of reg,reg fld/fst */
1035 {4, 4, 12}, /* cost of loading fp registers
1036 in SFmode, DFmode and XFmode */
1037 {6, 6, 8}, /* cost of storing fp registers
1038 in SFmode, DFmode and XFmode */
1039 2, /* cost of moving MMX register */
1040 {3, 3}, /* cost of loading MMX registers
1041 in SImode and DImode */
1042 {4, 4}, /* cost of storing MMX registers
1043 in SImode and DImode */
1044 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1045 {4, 3, 12, 12, 24}, /* cost of loading SSE registers
1046 in 32,64,128,256 and 512-bit */
1047 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
1048 in 32,64,128,256 and 512-bit */
ecc3135a 1049 5, 5, /* SSE->integer and integer->SSE moves */
1050 5, 5, /* mask->integer and integer->mask moves */
00cb3494
L
1051 {3, 4, 3}, /* cost of loading mask register
1052 in QImode, HImode, SImode. */
1053 {3, 4, 3}, /* cost if storing mask register
1054 in QImode, HImode, SImode. */
1055 2, /* cost of moving mask register. */
d321551c 1056 /* End of register allocator costs. */
72bb85f8 1057 },
d321551c 1058
64766e8d
JH
1059 COSTS_N_INSNS (1), /* cost of an add instruction */
1060 COSTS_N_INSNS (2), /* cost of a lea instruction */
1061 COSTS_N_INSNS (1), /* variable shift costs */
1062 COSTS_N_INSNS (1), /* constant shift costs */
1063 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1064 COSTS_N_INSNS (4), /* HI */
1065 COSTS_N_INSNS (3), /* SI */
1066 COSTS_N_INSNS (4), /* DI */
1067 COSTS_N_INSNS (5)}, /* other */
1068 0, /* cost of multiply per each bit set */
1069 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1070 COSTS_N_INSNS (26), /* HI */
1071 COSTS_N_INSNS (42), /* SI */
1072 COSTS_N_INSNS (74), /* DI */
1073 COSTS_N_INSNS (74)}, /* other */
1074 COSTS_N_INSNS (1), /* cost of movsx */
1075 COSTS_N_INSNS (1), /* cost of movzx */
1076 8, /* "large" insn */
1077 9, /* MOVE_RATIO */
25e22b19 1078 6, /* CLEAR_RATIO */
64766e8d
JH
1079 {3, 4, 3}, /* cost of loading integer registers
1080 in QImode, HImode and SImode.
1081 Relative to reg-reg move (2). */
1082 {3, 4, 3}, /* cost of storing integer registers */
d321551c
L
1083 {4, 3, 12, 12, 24}, /* cost of loading SSE register
1084 in 32bit, 64bit, 128bit, 256bit and 512bit */
1085 {4, 4, 10, 10, 20}, /* cost of storing SSE register
1086 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1087 {4, 3, 12, 12, 24}, /* cost of unaligned loads. */
b7167993 1088 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
d321551c
L
1089 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1090 5, /* cost of moving SSE register to integer. */
a4fe6139
JH
1091 4, 4, /* Gather load static, per_elt. */
1092 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
1093 64, /* size of l1 cache. */
1094 512, /* size of l2 cache. */
1095 64, /* size of prefetch block */
1096 /* New AMD processors never drop prefetches; if they cannot be performed
1097 immediately, they are queued. We set number of simultaneous prefetches
1098 to a large constant to reflect this (it probably is not a good idea not
1099 to limit number of prefetches at all, as their execution also takes some
1100 time). */
1101 100, /* number of parallel prefetches */
1102 3, /* Branch cost */
1103 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1104 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1105 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1106 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1107 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1108 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1109
c53c148c 1110 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1111 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1112 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1113 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1114 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
1115 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
1116 /* 11-16 */
1117 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
1118 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
1119 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
1120 COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
64766e8d
JH
1121 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1122 k8_memcpy,
1123 k8_memset,
f6fd8f2b
JH
1124 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1125 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1126 "16:8:8", /* Loop alignment. */
1127 "16:8:8", /* Jump alignment. */
1128 "0:0:8", /* Label alignment. */
1129 "16", /* Func alignment. */
64766e8d
JH
1130};
1131
1132/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1133 very small blocks it is better to use loop. For large blocks, libcall can
1134 do nontemporary accesses and beat inline considerably. */
1135static stringop_algs amdfam10_memcpy[2] = {
1136 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1137 {-1, rep_prefix_4_byte, false}}},
1138 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1139 {-1, libcall, false}}}};
1140static stringop_algs amdfam10_memset[2] = {
1141 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1142 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1143 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1144 {-1, libcall, false}}}};
1145struct processor_costs amdfam10_cost = {
72bb85f8 1146 {
d321551c 1147 /* Start of register allocator costs. integer->integer move cost is 2. */
64766e8d
JH
1148 4, /* cost for loading QImode using movzbl */
1149 {3, 4, 3}, /* cost of loading integer registers
1150 in QImode, HImode and SImode.
1151 Relative to reg-reg move (2). */
1152 {3, 4, 3}, /* cost of storing integer registers */
1153 4, /* cost of reg,reg fld/fst */
1154 {4, 4, 12}, /* cost of loading fp registers
1155 in SFmode, DFmode and XFmode */
1156 {6, 6, 8}, /* cost of storing fp registers
1157 in SFmode, DFmode and XFmode */
1158 2, /* cost of moving MMX register */
1159 {3, 3}, /* cost of loading MMX registers
1160 in SImode and DImode */
1161 {4, 4}, /* cost of storing MMX registers
1162 in SImode and DImode */
df41dbaf
JH
1163 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1164 {4, 4, 3, 6, 12}, /* cost of loading SSE registers
1165 in 32,64,128,256 and 512-bit */
df41dbaf
JH
1166 {4, 4, 5, 10, 20}, /* cost of storing SSE registers
1167 in 32,64,128,256 and 512-bit */
ecc3135a 1168 3, 3, /* SSE->integer and integer->SSE moves */
1169 3, 3, /* mask->integer and integer->mask moves */
00cb3494
L
1170 {3, 4, 3}, /* cost of loading mask register
1171 in QImode, HImode, SImode. */
1172 {3, 4, 3}, /* cost if storing mask register
1173 in QImode, HImode, SImode. */
1174 2, /* cost of moving mask register. */
d321551c 1175
64766e8d
JH
1176 /* On K8:
1177 MOVD reg64, xmmreg Double FSTORE 4
1178 MOVD reg32, xmmreg Double FSTORE 4
1179 On AMDFAM10:
1180 MOVD reg64, xmmreg Double FADD 3
1181 1/1 1/1
1182 MOVD reg32, xmmreg Double FADD 3
1183 1/1 1/1 */
d321551c 1184 /* End of register allocator costs. */
72bb85f8 1185 },
d321551c
L
1186
1187 COSTS_N_INSNS (1), /* cost of an add instruction */
1188 COSTS_N_INSNS (2), /* cost of a lea instruction */
1189 COSTS_N_INSNS (1), /* variable shift costs */
1190 COSTS_N_INSNS (1), /* constant shift costs */
1191 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1192 COSTS_N_INSNS (4), /* HI */
1193 COSTS_N_INSNS (3), /* SI */
1194 COSTS_N_INSNS (4), /* DI */
1195 COSTS_N_INSNS (5)}, /* other */
1196 0, /* cost of multiply per each bit set */
1197 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1198 COSTS_N_INSNS (35), /* HI */
1199 COSTS_N_INSNS (51), /* SI */
1200 COSTS_N_INSNS (83), /* DI */
1201 COSTS_N_INSNS (83)}, /* other */
1202 COSTS_N_INSNS (1), /* cost of movsx */
1203 COSTS_N_INSNS (1), /* cost of movzx */
1204 8, /* "large" insn */
1205 9, /* MOVE_RATIO */
25e22b19 1206 6, /* CLEAR_RATIO */
d321551c
L
1207 {3, 4, 3}, /* cost of loading integer registers
1208 in QImode, HImode and SImode.
1209 Relative to reg-reg move (2). */
1210 {3, 4, 3}, /* cost of storing integer registers */
1211 {4, 4, 3, 6, 12}, /* cost of loading SSE register
1212 in 32bit, 64bit, 128bit, 256bit and 512bit */
1213 {4, 4, 5, 10, 20}, /* cost of storing SSE register
1214 in 32bit, 64bit, 128bit, 256bit and 512bit */
1215 {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
1216 {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
1217 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1218 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
1219 4, 4, /* Gather load static, per_elt. */
1220 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
1221 64, /* size of l1 cache. */
1222 512, /* size of l2 cache. */
1223 64, /* size of prefetch block */
1224 /* New AMD processors never drop prefetches; if they cannot be performed
1225 immediately, they are queued. We set number of simultaneous prefetches
1226 to a large constant to reflect this (it probably is not a good idea not
1227 to limit number of prefetches at all, as their execution also takes some
1228 time). */
1229 100, /* number of parallel prefetches */
1230 2, /* Branch cost */
1231 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1232 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1233 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1234 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1235 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1236 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1237
c53c148c 1238 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1239 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1240 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1241 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1242 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
1243 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
1244 /* 11-16 */
1245 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
1246 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
1247 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
1248 COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
64766e8d
JH
1249 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1250 amdfam10_memcpy,
1251 amdfam10_memset,
f6fd8f2b
JH
1252 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
1253 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1254 "32:25:8", /* Loop alignment. */
1255 "32:8:8", /* Jump alignment. */
1256 "0:0:8", /* Label alignment. */
1257 "32", /* Func alignment. */
64766e8d
JH
1258};
1259
c727b835 1260/* BDVER has optimized REP instruction for medium sized blocks, but for
64766e8d
JH
1261 very small blocks it is better to use loop. For large blocks, libcall
1262 can do nontemporary accesses and beat inline considerably. */
c727b835 1263static stringop_algs bdver_memcpy[2] = {
64766e8d
JH
1264 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1265 {-1, rep_prefix_4_byte, false}}},
1266 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1267 {-1, libcall, false}}}};
c727b835 1268static stringop_algs bdver_memset[2] = {
64766e8d
JH
1269 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1270 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1271 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1272 {-1, libcall, false}}}};
1273
c727b835 1274const struct processor_costs bdver_cost = {
72bb85f8 1275 {
d321551c
L
1276 /* Start of register allocator costs. integer->integer move cost is 2. */
1277 8, /* cost for loading QImode using movzbl */
1278 {8, 8, 8}, /* cost of loading integer registers
1279 in QImode, HImode and SImode.
1280 Relative to reg-reg move (2). */
1281 {8, 8, 8}, /* cost of storing integer registers */
1282 4, /* cost of reg,reg fld/fst */
1283 {12, 12, 28}, /* cost of loading fp registers
1284 in SFmode, DFmode and XFmode */
1285 {10, 10, 18}, /* cost of storing fp registers
1286 in SFmode, DFmode and XFmode */
1287 4, /* cost of moving MMX register */
1288 {12, 12}, /* cost of loading MMX registers
1289 in SImode and DImode */
1290 {10, 10}, /* cost of storing MMX registers
1291 in SImode and DImode */
1292 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1293 {12, 12, 10, 40, 60}, /* cost of loading SSE registers
1294 in 32,64,128,256 and 512-bit */
1295 {10, 10, 10, 40, 60}, /* cost of storing SSE registers
1296 in 32,64,128,256 and 512-bit */
1297 16, 20, /* SSE->integer and integer->SSE moves */
ecc3135a 1298 16, 20, /* mask->integer and integer->mask moves */
00cb3494
L
1299 {8, 8, 8}, /* cost of loading mask register
1300 in QImode, HImode, SImode. */
1301 {8, 8, 8}, /* cost if storing mask register
1302 in QImode, HImode, SImode. */
1303 2, /* cost of moving mask register. */
d321551c 1304 /* End of register allocator costs. */
72bb85f8 1305 },
d321551c 1306
64766e8d
JH
1307 COSTS_N_INSNS (1), /* cost of an add instruction */
1308 COSTS_N_INSNS (1), /* cost of a lea instruction */
1309 COSTS_N_INSNS (1), /* variable shift costs */
1310 COSTS_N_INSNS (1), /* constant shift costs */
1311 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1312 COSTS_N_INSNS (4), /* HI */
1313 COSTS_N_INSNS (4), /* SI */
1314 COSTS_N_INSNS (6), /* DI */
1315 COSTS_N_INSNS (6)}, /* other */
1316 0, /* cost of multiply per each bit set */
1317 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1318 COSTS_N_INSNS (35), /* HI */
1319 COSTS_N_INSNS (51), /* SI */
1320 COSTS_N_INSNS (83), /* DI */
1321 COSTS_N_INSNS (83)}, /* other */
1322 COSTS_N_INSNS (1), /* cost of movsx */
1323 COSTS_N_INSNS (1), /* cost of movzx */
1324 8, /* "large" insn */
1325 9, /* MOVE_RATIO */
25e22b19 1326 6, /* CLEAR_RATIO */
df41dbaf 1327 {8, 8, 8}, /* cost of loading integer registers
64766e8d
JH
1328 in QImode, HImode and SImode.
1329 Relative to reg-reg move (2). */
df41dbaf 1330 {8, 8, 8}, /* cost of storing integer registers */
d321551c
L
1331 {12, 12, 10, 40, 60}, /* cost of loading SSE register
1332 in 32bit, 64bit, 128bit, 256bit and 512bit */
1333 {10, 10, 10, 40, 60}, /* cost of storing SSE register
1334 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1335 {12, 12, 10, 40, 60}, /* cost of unaligned loads. */
b7167993 1336 {10, 10, 10, 40, 60}, /* cost of unaligned stores. */
d321551c
L
1337 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1338 16, /* cost of moving SSE register to integer. */
a4fe6139
JH
1339 12, 12, /* Gather load static, per_elt. */
1340 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
1341 16, /* size of l1 cache. */
1342 2048, /* size of l2 cache. */
1343 64, /* size of prefetch block */
1344 /* New AMD processors never drop prefetches; if they cannot be performed
1345 immediately, they are queued. We set number of simultaneous prefetches
1346 to a large constant to reflect this (it probably is not a good idea not
1347 to limit number of prefetches at all, as their execution also takes some
1348 time). */
1349 100, /* number of parallel prefetches */
1350 2, /* Branch cost */
1351 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1352 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1353 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1354 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1355 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1356 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
6065f444 1357
c53c148c 1358 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1359 COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
1360 COSTS_N_INSNS (6), /* cost of MULSS instruction. */
1361 COSTS_N_INSNS (6), /* cost of MULSD instruction. */
c53c148c
JH
1362 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
1363 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
1364 /* 9-24 */
1365 COSTS_N_INSNS (24), /* cost of DIVSS instruction. */
1366 /* 9-27 */
1367 COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
1368 COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
1369 COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
64766e8d 1370 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
c727b835
RB
1371 bdver_memcpy,
1372 bdver_memset,
f6fd8f2b
JH
1373 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1374 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1375 "16:11:8", /* Loop alignment. */
1376 "16:8:8", /* Jump alignment. */
1377 "0:0:8", /* Label alignment. */
1378 "11", /* Func alignment. */
64766e8d
JH
1379};
1380
1381
1382/* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1383 very small blocks it is better to use loop. For large blocks, libcall
1384 can do nontemporary accesses and beat inline considerably. */
1385static stringop_algs znver1_memcpy[2] = {
da346efd
ML
1386 /* 32-bit tuning. */
1387 {libcall, {{6, loop, false},
1388 {14, unrolled_loop, false},
dc65aba7 1389 {-1, libcall, false}}},
da346efd
ML
1390 /* 64-bit tuning. */
1391 {libcall, {{16, loop, false},
dc65aba7 1392 {128, rep_prefix_8_byte, false},
64766e8d
JH
1393 {-1, libcall, false}}}};
1394static stringop_algs znver1_memset[2] = {
da346efd
ML
1395 /* 32-bit tuning. */
1396 {libcall, {{8, loop, false},
1397 {24, unrolled_loop, false},
dc65aba7 1398 {128, rep_prefix_4_byte, false},
da346efd
ML
1399 {-1, libcall, false}}},
1400 /* 64-bit tuning. */
1401 {libcall, {{48, unrolled_loop, false},
dc65aba7 1402 {128, rep_prefix_8_byte, false},
64766e8d
JH
1403 {-1, libcall, false}}}};
1404struct processor_costs znver1_cost = {
72bb85f8 1405 {
d321551c
L
1406 /* Start of register allocator costs. integer->integer move cost is 2. */
1407
1408 /* reg-reg moves are done by renaming and thus they are even cheaper than
1409 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
1410 to doubles of latencies, we do not model this correctly. It does not
1411 seem to make practical difference to bump prices up even more. */
1412 6, /* cost for loading QImode using
1413 movzbl. */
1414 {6, 6, 6}, /* cost of loading integer registers
1415 in QImode, HImode and SImode.
1416 Relative to reg-reg move (2). */
1417 {8, 8, 8}, /* cost of storing integer
1418 registers. */
1419 2, /* cost of reg,reg fld/fst. */
1420 {6, 6, 16}, /* cost of loading fp registers
1421 in SFmode, DFmode and XFmode. */
1422 {8, 8, 16}, /* cost of storing fp registers
1423 in SFmode, DFmode and XFmode. */
1424 2, /* cost of moving MMX register. */
1425 {6, 6}, /* cost of loading MMX registers
1426 in SImode and DImode. */
1427 {8, 8}, /* cost of storing MMX registers
1428 in SImode and DImode. */
1429 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1430 {6, 6, 6, 12, 24}, /* cost of loading SSE registers
1431 in 32,64,128,256 and 512-bit. */
1432 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
1433 in 32,64,128,256 and 512-bit. */
ecc3135a 1434 6, 6, /* SSE->integer and integer->SSE moves. */
1435 8, 8, /* mask->integer and integer->mask moves */
00cb3494
L
1436 {6, 6, 6}, /* cost of loading mask register
1437 in QImode, HImode, SImode. */
1438 {8, 8, 8}, /* cost if storing mask register
1439 in QImode, HImode, SImode. */
1440 2, /* cost of moving mask register. */
d321551c 1441 /* End of register allocator costs. */
72bb85f8 1442 },
d321551c 1443
64766e8d
JH
1444 COSTS_N_INSNS (1), /* cost of an add instruction. */
1445 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1446 COSTS_N_INSNS (1), /* variable shift costs. */
1447 COSTS_N_INSNS (1), /* constant shift costs. */
1448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1449 COSTS_N_INSNS (3), /* HI. */
1450 COSTS_N_INSNS (3), /* SI. */
6065f444
JH
1451 COSTS_N_INSNS (3), /* DI. */
1452 COSTS_N_INSNS (3)}, /* other. */
64766e8d
JH
1453 0, /* cost of multiply per each bit
1454 set. */
6065f444
JH
1455 /* Depending on parameters, idiv can get faster on ryzen. This is upper
1456 bound. */
1457 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
1458 COSTS_N_INSNS (22), /* HI. */
1459 COSTS_N_INSNS (30), /* SI. */
1460 COSTS_N_INSNS (45), /* DI. */
1461 COSTS_N_INSNS (45)}, /* other. */
64766e8d
JH
1462 COSTS_N_INSNS (1), /* cost of movsx. */
1463 COSTS_N_INSNS (1), /* cost of movzx. */
1464 8, /* "large" insn. */
1465 9, /* MOVE_RATIO. */
25e22b19 1466 6, /* CLEAR_RATIO */
01118373 1467 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
1468 in QImode, HImode and SImode.
1469 Relative to reg-reg move (2). */
01118373 1470 {8, 8, 8}, /* cost of storing integer
64766e8d 1471 registers. */
d321551c
L
1472 {6, 6, 6, 12, 24}, /* cost of loading SSE register
1473 in 32bit, 64bit, 128bit, 256bit and 512bit */
1474 {8, 8, 8, 16, 32}, /* cost of storing SSE register
1475 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1476 {6, 6, 6, 12, 24}, /* cost of unaligned loads. */
b7167993 1477 {8, 8, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
1478 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1479 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
1480 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1481 throughput 12. Approx 9 uops do not depend on vector size and every load
1482 is 7 uops. */
1483 18, 8, /* Gather load static, per_elt. */
1484 18, 10, /* Gather store static, per_elt. */
64766e8d
JH
1485 32, /* size of l1 cache. */
1486 512, /* size of l2 cache. */
1487 64, /* size of prefetch block. */
1488 /* New AMD processors never drop prefetches; if they cannot be performed
1489 immediately, they are queued. We set number of simultaneous prefetches
1490 to a large constant to reflect this (it probably is not a good idea not
1491 to limit number of prefetches at all, as their execution also takes some
1492 time). */
1493 100, /* number of parallel prefetches. */
1494 3, /* Branch cost. */
6065f444
JH
1495 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1496 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1497 /* Latency of fdiv is 8-15. */
1498 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1501 /* Latency of fsqrt is 4-10. */
1502 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1503
c53c148c 1504 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
1505 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1506 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
1507 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1508 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1509 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
1510 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1511 /* 9-13 */
1512 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1513 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1514 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
64766e8d
JH
1515 /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
1516 and it can execute 2 integer additions and 2 multiplications thus
1517 reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
1518 that 4 works better than 6 probably due to register pressure.
1519
1520 Integer vector operations are taken by FP unit and execute 3 vector
1521 plus/minus operations per cycle but only one multiply. This is adjusted
1522 in ix86_reassociation_width. */
1523 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1524 znver1_memcpy,
1525 znver1_memset,
f6fd8f2b
JH
1526 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1527 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1528 "16", /* Loop alignment. */
1529 "16", /* Jump alignment. */
1530 "0:0:8", /* Label alignment. */
1531 "16", /* Func alignment. */
64766e8d
JH
1532};
1533
2901f42f
VK
1534/* ZNVER2 has optimized REP instruction for medium sized blocks, but for
1535 very small blocks it is better to use loop. For large blocks, libcall
1536 can do nontemporary accesses and beat inline considerably. */
1537static stringop_algs znver2_memcpy[2] = {
da346efd
ML
1538 /* 32-bit tuning. */
1539 {libcall, {{6, loop, false},
1540 {14, unrolled_loop, false},
dc65aba7 1541 {-1, libcall, false}}},
da346efd
ML
1542 /* 64-bit tuning. */
1543 {libcall, {{16, loop, false},
1544 {64, rep_prefix_4_byte, false},
2901f42f
VK
1545 {-1, libcall, false}}}};
1546static stringop_algs znver2_memset[2] = {
da346efd
ML
1547 /* 32-bit tuning. */
1548 {libcall, {{8, loop, false},
1549 {24, unrolled_loop, false},
dc65aba7 1550 {128, rep_prefix_4_byte, false},
da346efd
ML
1551 {-1, libcall, false}}},
1552 /* 64-bit tuning. */
1553 {libcall, {{24, rep_prefix_4_byte, false},
1554 {128, rep_prefix_8_byte, false},
2901f42f
VK
1555 {-1, libcall, false}}}};
1556
1557struct processor_costs znver2_cost = {
72bb85f8 1558 {
d321551c 1559 /* Start of register allocator costs. integer->integer move cost is 2. */
2901f42f 1560
5b32a181
JH
1561 /* reg-reg moves are done by renaming and thus they are even cheaper than
1562 1 cycle. Because reg-reg move cost is 2 and following tables correspond
1563 to doubles of latencies, we do not model this correctly. It does not
1564 seem to make practical difference to bump prices up even more. */
1565 6, /* cost for loading QImode using
1566 movzbl. */
1567 {6, 6, 6}, /* cost of loading integer registers
1568 in QImode, HImode and SImode.
1569 Relative to reg-reg move (2). */
1570 {8, 8, 8}, /* cost of storing integer
1571 registers. */
1572 2, /* cost of reg,reg fld/fst. */
1573 {6, 6, 16}, /* cost of loading fp registers
1574 in SFmode, DFmode and XFmode. */
1575 {8, 8, 16}, /* cost of storing fp registers
1576 in SFmode, DFmode and XFmode. */
1577 2, /* cost of moving MMX register. */
1578 {6, 6}, /* cost of loading MMX registers
1579 in SImode and DImode. */
1580 {8, 8}, /* cost of storing MMX registers
1581 in SImode and DImode. */
1582 2, 2, 3, /* cost of moving XMM,YMM,ZMM
1583 register. */
1584 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
1585 in 32,64,128,256 and 512-bit. */
1586 {8, 8, 8, 8, 16}, /* cost of storing SSE registers
1587 in 32,64,128,256 and 512-bit. */
1588 6, 6, /* SSE->integer and integer->SSE
1589 moves. */
1590 8, 8, /* mask->integer and integer->mask moves */
1591 {6, 6, 6}, /* cost of loading mask register
1592 in QImode, HImode, SImode. */
1593 {8, 8, 8}, /* cost if storing mask register
1594 in QImode, HImode, SImode. */
1595 2, /* cost of moving mask register. */
1596 /* End of register allocator costs. */
1597 },
1598
1599 COSTS_N_INSNS (1), /* cost of an add instruction. */
1600 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1601 COSTS_N_INSNS (1), /* variable shift costs. */
1602 COSTS_N_INSNS (1), /* constant shift costs. */
1603 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1604 COSTS_N_INSNS (3), /* HI. */
1605 COSTS_N_INSNS (3), /* SI. */
1606 COSTS_N_INSNS (3), /* DI. */
1607 COSTS_N_INSNS (3)}, /* other. */
1608 0, /* cost of multiply per each bit
1609 set. */
1610 /* Depending on parameters, idiv can get faster on ryzen. This is upper
1611 bound. */
1612 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
1613 COSTS_N_INSNS (22), /* HI. */
1614 COSTS_N_INSNS (30), /* SI. */
1615 COSTS_N_INSNS (45), /* DI. */
1616 COSTS_N_INSNS (45)}, /* other. */
1617 COSTS_N_INSNS (1), /* cost of movsx. */
1618 COSTS_N_INSNS (1), /* cost of movzx. */
1619 8, /* "large" insn. */
1620 9, /* MOVE_RATIO. */
1621 6, /* CLEAR_RATIO */
1622 {6, 6, 6}, /* cost of loading integer registers
1623 in QImode, HImode and SImode.
1624 Relative to reg-reg move (2). */
1625 {8, 8, 8}, /* cost of storing integer
1626 registers. */
1627 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
1628 in 32bit, 64bit, 128bit, 256bit and 512bit */
1629 {8, 8, 8, 8, 16}, /* cost of storing SSE register
1630 in 32bit, 64bit, 128bit, 256bit and 512bit */
1631 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
1632 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
1633 2, 2, 3, /* cost of moving XMM,YMM,ZMM
1634 register. */
1635 6, /* cost of moving SSE register to integer. */
1636 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1637 throughput 12. Approx 9 uops do not depend on vector size and every load
1638 is 7 uops. */
1639 18, 8, /* Gather load static, per_elt. */
1640 18, 10, /* Gather store static, per_elt. */
1641 32, /* size of l1 cache. */
1642 512, /* size of l2 cache. */
1643 64, /* size of prefetch block. */
1644 /* New AMD processors never drop prefetches; if they cannot be performed
1645 immediately, they are queued. We set number of simultaneous prefetches
1646 to a large constant to reflect this (it probably is not a good idea not
1647 to limit number of prefetches at all, as their execution also takes some
1648 time). */
1649 100, /* number of parallel prefetches. */
1650 3, /* Branch cost. */
1651 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1652 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1653 /* Latency of fdiv is 8-15. */
1654 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1657 /* Latency of fsqrt is 4-10. */
1658 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1659
1660 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1661 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1662 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
1663 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
1664 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1665 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
1666 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1667 /* 9-13. */
1668 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1669 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1670 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
1671 /* Zen can execute 4 integer operations per cycle. FP operations
1672 take 3 cycles and it can execute 2 integer additions and 2
1673 multiplications thus reassociation may make sense up to with of 6.
1674 SPEC2k6 bencharks suggests
1675 that 4 works better than 6 probably due to register pressure.
1676
1677 Integer vector operations are taken by FP unit and execute 3 vector
1678 plus/minus operations per cycle but only one multiply. This is adjusted
1679 in ix86_reassociation_width. */
1680 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1681 znver2_memcpy,
1682 znver2_memset,
1683 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1684 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
1685 "16", /* Loop alignment. */
1686 "16", /* Jump alignment. */
1687 "0:0:8", /* Label alignment. */
1688 "16", /* Func alignment. */
1689};
1690
1691struct processor_costs znver3_cost = {
1692 {
1693 /* Start of register allocator costs. integer->integer move cost is 2. */
1694
2901f42f
VK
1695 /* reg-reg moves are done by renaming and thus they are even cheaper than
1696 1 cycle. Because reg-reg move cost is 2 and following tables correspond
1697 to doubles of latencies, we do not model this correctly. It does not
1698 seem to make practical difference to bump prices up even more. */
1699 6, /* cost for loading QImode using
1700 movzbl. */
1701 {6, 6, 6}, /* cost of loading integer registers
1702 in QImode, HImode and SImode.
1703 Relative to reg-reg move (2). */
1704 {8, 8, 8}, /* cost of storing integer
1705 registers. */
1706 2, /* cost of reg,reg fld/fst. */
1707 {6, 6, 16}, /* cost of loading fp registers
1708 in SFmode, DFmode and XFmode. */
1709 {8, 8, 16}, /* cost of storing fp registers
1710 in SFmode, DFmode and XFmode. */
1711 2, /* cost of moving MMX register. */
1712 {6, 6}, /* cost of loading MMX registers
1713 in SImode and DImode. */
1714 {8, 8}, /* cost of storing MMX registers
1715 in SImode and DImode. */
187dd65d 1716 2, 2, 3, /* cost of moving XMM,YMM,ZMM
2901f42f 1717 register. */
187dd65d 1718 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
2901f42f 1719 in 32,64,128,256 and 512-bit. */
2901f42f
VK
1720 {8, 8, 8, 8, 16}, /* cost of storing SSE registers
1721 in 32,64,128,256 and 512-bit. */
2901f42f
VK
1722 6, 6, /* SSE->integer and integer->SSE
1723 moves. */
ecc3135a 1724 8, 8, /* mask->integer and integer->mask moves */
00cb3494
L
1725 {6, 6, 6}, /* cost of loading mask register
1726 in QImode, HImode, SImode. */
1727 {8, 8, 8}, /* cost if storing mask register
1728 in QImode, HImode, SImode. */
1729 2, /* cost of moving mask register. */
d321551c 1730 /* End of register allocator costs. */
72bb85f8 1731 },
d321551c
L
1732
1733 COSTS_N_INSNS (1), /* cost of an add instruction. */
1734 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1735 COSTS_N_INSNS (1), /* variable shift costs. */
1736 COSTS_N_INSNS (1), /* constant shift costs. */
1737 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1738 COSTS_N_INSNS (3), /* HI. */
1739 COSTS_N_INSNS (3), /* SI. */
1740 COSTS_N_INSNS (3), /* DI. */
1741 COSTS_N_INSNS (3)}, /* other. */
1742 0, /* cost of multiply per each bit
1743 set. */
ab03c0d5
JH
1744 {COSTS_N_INSNS (9), /* cost of a divide/mod for QI. */
1745 COSTS_N_INSNS (10), /* HI. */
1746 COSTS_N_INSNS (12), /* SI. */
1747 COSTS_N_INSNS (17), /* DI. */
1748 COSTS_N_INSNS (17)}, /* other. */
d321551c
L
1749 COSTS_N_INSNS (1), /* cost of movsx. */
1750 COSTS_N_INSNS (1), /* cost of movzx. */
1751 8, /* "large" insn. */
1752 9, /* MOVE_RATIO. */
25e22b19 1753 6, /* CLEAR_RATIO */
d321551c
L
1754 {6, 6, 6}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {8, 8, 8}, /* cost of storing integer
1758 registers. */
1759 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
1760 in 32bit, 64bit, 128bit, 256bit and 512bit */
1761 {8, 8, 8, 8, 16}, /* cost of storing SSE register
1762 in 32bit, 64bit, 128bit, 256bit and 512bit */
1763 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
1764 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
1765 2, 2, 3, /* cost of moving XMM,YMM,ZMM
1766 register. */
1767 6, /* cost of moving SSE register to integer. */
bd364aae
JH
1768 /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
1769 throughput 9. Approx 7 uops do not depend on vector size and every load
1770 is 4 uops. */
1771 14, 8, /* Gather load static, per_elt. */
1772 14, 10, /* Gather store static, per_elt. */
2901f42f
VK
1773 32, /* size of l1 cache. */
1774 512, /* size of l2 cache. */
1775 64, /* size of prefetch block. */
1776 /* New AMD processors never drop prefetches; if they cannot be performed
1777 immediately, they are queued. We set number of simultaneous prefetches
1778 to a large constant to reflect this (it probably is not a good idea not
1779 to limit number of prefetches at all, as their execution also takes some
1780 time). */
1781 100, /* number of parallel prefetches. */
1782 3, /* Branch cost. */
1783 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1784 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1785 /* Latency of fdiv is 8-15. */
1786 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1787 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1788 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1789 /* Latency of fsqrt is 4-10. */
1790 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1791
1792 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1793 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1794 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
187dd65d 1795 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
2901f42f
VK
1796 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1797 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
1798 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1799 /* 9-13. */
1800 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1801 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1802 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
1803 /* Zen can execute 4 integer operations per cycle. FP operations
1804 take 3 cycles and it can execute 2 integer additions and 2
1805 multiplications thus reassociation may make sense up to with of 6.
1806 SPEC2k6 bencharks suggests
1807 that 4 works better than 6 probably due to register pressure.
1808
1809 Integer vector operations are taken by FP unit and execute 3 vector
1810 plus/minus operations per cycle but only one multiply. This is adjusted
1811 in ix86_reassociation_width. */
1812 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1813 znver2_memcpy,
1814 znver2_memset,
1815 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1816 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
1817 "16", /* Loop alignment. */
1818 "16", /* Jump alignment. */
1819 "0:0:8", /* Label alignment. */
1820 "16", /* Func alignment. */
1821};
1822
c234d831
UB
1823/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
1824static stringop_algs skylake_memcpy[2] = {
a32452a5
L
1825 {libcall,
1826 {{256, rep_prefix_1_byte, true},
1827 {256, loop, false},
1828 {-1, libcall, false}}},
1829 {libcall,
1830 {{256, rep_prefix_1_byte, true},
1831 {256, loop, false},
1832 {-1, libcall, false}}}};
c234d831
UB
1833
1834static stringop_algs skylake_memset[2] = {
a32452a5
L
1835 {libcall,
1836 {{256, rep_prefix_1_byte, true},
1837 {256, loop, false},
1838 {-1, libcall, false}}},
1839 {libcall,
1840 {{256, rep_prefix_1_byte, true},
1841 {256, loop, false},
1842 {-1, libcall, false}}}};
c234d831
UB
1843
1844static const
1845struct processor_costs skylake_cost = {
72bb85f8 1846 {
d321551c
L
1847 /* Start of register allocator costs. integer->integer move cost is 2. */
1848 6, /* cost for loading QImode using movzbl */
1849 {4, 4, 4}, /* cost of loading integer registers
1850 in QImode, HImode and SImode.
1851 Relative to reg-reg move (2). */
7706f2f3 1852 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
1853 2, /* cost of reg,reg fld/fst */
1854 {6, 6, 8}, /* cost of loading fp registers
1855 in SFmode, DFmode and XFmode */
1856 {6, 6, 10}, /* cost of storing fp registers
1857 in SFmode, DFmode and XFmode */
1858 2, /* cost of moving MMX register */
1859 {6, 6}, /* cost of loading MMX registers
1860 in SImode and DImode */
1861 {6, 6}, /* cost of storing MMX registers
1862 in SImode and DImode */
1863 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1864 {6, 6, 6, 10, 20}, /* cost of loading SSE registers
1865 in 32,64,128,256 and 512-bit */
1866 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1867 in 32,64,128,256 and 512-bit */
ecc3135a 1868 6, 6, /* SSE->integer and integer->SSE moves */
1869 5, 5, /* mask->integer and integer->mask moves */
1870 {8, 8, 8}, /* cost of loading mask register
00cb3494 1871 in QImode, HImode, SImode. */
ecc3135a 1872 {6, 6, 6}, /* cost if storing mask register
00cb3494 1873 in QImode, HImode, SImode. */
16516644 1874 3, /* cost of moving mask register. */
d321551c 1875 /* End of register allocator costs. */
72bb85f8 1876 },
d321551c 1877
c234d831
UB
1878 COSTS_N_INSNS (1), /* cost of an add instruction */
1879 COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
1880 COSTS_N_INSNS (1), /* variable shift costs */
1881 COSTS_N_INSNS (1), /* constant shift costs */
1882 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1883 COSTS_N_INSNS (4), /* HI */
1884 COSTS_N_INSNS (3), /* SI */
a2ef9558
MT
1885 COSTS_N_INSNS (3), /* DI */
1886 COSTS_N_INSNS (3)}, /* other */
c234d831 1887 0, /* cost of multiply per each bit set */
02308bd3
MT
1888 /* Expanding div/mod currently doesn't consider parallelism. So the cost
1889 model is not realistic. We compensate by increasing the latencies a bit. */
1890 {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
1891 COSTS_N_INSNS (11), /* HI */
1892 COSTS_N_INSNS (14), /* SI */
c234d831
UB
1893 COSTS_N_INSNS (76), /* DI */
1894 COSTS_N_INSNS (76)}, /* other */
1895 COSTS_N_INSNS (1), /* cost of movsx */
1896 COSTS_N_INSNS (0), /* cost of movzx */
1897 8, /* "large" insn */
1898 17, /* MOVE_RATIO */
a32452a5 1899 17, /* CLEAR_RATIO */
c234d831
UB
1900 {4, 4, 4}, /* cost of loading integer registers
1901 in QImode, HImode and SImode.
1902 Relative to reg-reg move (2). */
101a0841 1903 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
1904 {6, 6, 6, 10, 20}, /* cost of loading SSE register
1905 in 32bit, 64bit, 128bit, 256bit and 512bit */
d3152981 1906 {8, 8, 8, 8, 16}, /* cost of storing SSE register
d321551c 1907 in 32bit, 64bit, 128bit, 256bit and 512bit */
c234d831 1908 {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
c234d831 1909 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
d321551c 1910 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
83858ba1 1911 6, /* cost of moving SSE register to integer. */
c234d831
UB
1912 20, 8, /* Gather load static, per_elt. */
1913 22, 10, /* Gather store static, per_elt. */
1914 64, /* size of l1 cache. */
1915 512, /* size of l2 cache. */
1916 64, /* size of prefetch block */
1917 6, /* number of parallel prefetches */
1918 3, /* Branch cost */
1919 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1920 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1921 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1924 COSTS_N_INSNS (20), /* cost of FSQRT instruction. */
1925
1926 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1927 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1928 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1929 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
1930 COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
1931 COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
1932 COSTS_N_INSNS (11), /* cost of DIVSS instruction. */
1933 COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
1934 COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
1935 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
1936 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
1937 skylake_memcpy,
1938 skylake_memset,
1939 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1940 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1941 "16:11:8", /* Loop alignment. */
1942 "16:11:8", /* Jump alignment. */
1943 "0:0:8", /* Label alignment. */
1944 "16", /* Func alignment. */
c234d831 1945};
bf24f4ec
L
1946
1947/* icelake_cost should produce code tuned for Icelake family of CPUs.
1948 NB: rep_prefix_1_byte is used only for known size. */
1949
1950static stringop_algs icelake_memcpy[2] = {
1951 {libcall,
1952 {{256, rep_prefix_1_byte, true},
1953 {256, loop, false},
1954 {-1, libcall, false}}},
1955 {libcall,
1956 {{256, rep_prefix_1_byte, true},
1957 {256, loop, false},
1958 {-1, libcall, false}}}};
1959
1960static stringop_algs icelake_memset[2] = {
1961 {libcall,
1962 {{256, rep_prefix_1_byte, true},
1963 {256, loop, false},
1964 {-1, libcall, false}}},
1965 {libcall,
1966 {{256, rep_prefix_1_byte, true},
1967 {256, loop, false},
1968 {-1, libcall, false}}}};
1969
1970static const
1971struct processor_costs icelake_cost = {
1972 {
1973 /* Start of register allocator costs. integer->integer move cost is 2. */
1974 6, /* cost for loading QImode using movzbl */
1975 {4, 4, 4}, /* cost of loading integer registers
1976 in QImode, HImode and SImode.
1977 Relative to reg-reg move (2). */
1978 {6, 6, 6}, /* cost of storing integer registers */
1979 2, /* cost of reg,reg fld/fst */
1980 {6, 6, 8}, /* cost of loading fp registers
1981 in SFmode, DFmode and XFmode */
1982 {6, 6, 10}, /* cost of storing fp registers
1983 in SFmode, DFmode and XFmode */
1984 2, /* cost of moving MMX register */
1985 {6, 6}, /* cost of loading MMX registers
1986 in SImode and DImode */
1987 {6, 6}, /* cost of storing MMX registers
1988 in SImode and DImode */
1989 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1990 {6, 6, 6, 10, 20}, /* cost of loading SSE registers
1991 in 32,64,128,256 and 512-bit */
1992 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1993 in 32,64,128,256 and 512-bit */
1994 6, 6, /* SSE->integer and integer->SSE moves */
1995 5, 5, /* mask->integer and integer->mask moves */
1996 {8, 8, 8}, /* cost of loading mask register
1997 in QImode, HImode, SImode. */
1998 {6, 6, 6}, /* cost if storing mask register
1999 in QImode, HImode, SImode. */
2000 3, /* cost of moving mask register. */
2001 /* End of register allocator costs. */
2002 },
2003
2004 COSTS_N_INSNS (1), /* cost of an add instruction */
2005 COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
2006 COSTS_N_INSNS (1), /* variable shift costs */
2007 COSTS_N_INSNS (1), /* constant shift costs */
2008 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2009 COSTS_N_INSNS (4), /* HI */
2010 COSTS_N_INSNS (3), /* SI */
2011 COSTS_N_INSNS (3), /* DI */
2012 COSTS_N_INSNS (3)}, /* other */
2013 0, /* cost of multiply per each bit set */
2014 /* Expanding div/mod currently doesn't consider parallelism. So the cost
2015 model is not realistic. We compensate by increasing the latencies a bit. */
2016 {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
2017 COSTS_N_INSNS (11), /* HI */
2018 COSTS_N_INSNS (14), /* SI */
2019 COSTS_N_INSNS (76), /* DI */
2020 COSTS_N_INSNS (76)}, /* other */
2021 COSTS_N_INSNS (1), /* cost of movsx */
2022 COSTS_N_INSNS (0), /* cost of movzx */
2023 8, /* "large" insn */
2024 17, /* MOVE_RATIO */
2025 17, /* CLEAR_RATIO */
2026 {4, 4, 4}, /* cost of loading integer registers
2027 in QImode, HImode and SImode.
2028 Relative to reg-reg move (2). */
2029 {6, 6, 6}, /* cost of storing integer registers */
2030 {6, 6, 6, 10, 20}, /* cost of loading SSE register
2031 in 32bit, 64bit, 128bit, 256bit and 512bit */
d3152981 2032 {8, 8, 8, 8, 16}, /* cost of storing SSE register
bf24f4ec
L
2033 in 32bit, 64bit, 128bit, 256bit and 512bit */
2034 {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
2035 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
2036 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2037 6, /* cost of moving SSE register to integer. */
2038 20, 8, /* Gather load static, per_elt. */
2039 22, 10, /* Gather store static, per_elt. */
2040 64, /* size of l1 cache. */
2041 512, /* size of l2 cache. */
2042 64, /* size of prefetch block */
2043 6, /* number of parallel prefetches */
2044 3, /* Branch cost */
2045 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
2046 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
2047 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2048 COSTS_N_INSNS (1), /* cost of FABS instruction. */
2049 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
2050 COSTS_N_INSNS (20), /* cost of FSQRT instruction. */
2051
2052 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
2053 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
2054 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2055 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
2056 COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
2057 COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
2058 COSTS_N_INSNS (11), /* cost of DIVSS instruction. */
2059 COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
2060 COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
2061 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
2062 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
2063 icelake_memcpy,
2064 icelake_memset,
2065 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2066 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
2067 "16:11:8", /* Loop alignment. */
2068 "16:11:8", /* Jump alignment. */
2069 "0:0:8", /* Label alignment. */
2070 "16", /* Func alignment. */
2071};
2072
4f442a3b
CL
2073/* alderlake_cost should produce code tuned for alderlake family of CPUs. */
2074static stringop_algs alderlake_memcpy[2] = {
2075 {libcall,
2076 {{256, rep_prefix_1_byte, true},
2077 {256, loop, false},
2078 {-1, libcall, false}}},
2079 {libcall,
2080 {{256, rep_prefix_1_byte, true},
2081 {256, loop, false},
2082 {-1, libcall, false}}}};
2083static stringop_algs alderlake_memset[2] = {
2084 {libcall,
2085 {{256, rep_prefix_1_byte, true},
2086 {256, loop, false},
2087 {-1, libcall, false}}},
2088 {libcall,
2089 {{256, rep_prefix_1_byte, true},
2090 {256, loop, false},
2091 {-1, libcall, false}}}};
2092static const
2093struct processor_costs alderlake_cost = {
2094 {
2095 /* Start of register allocator costs. integer->integer move cost is 2. */
2096 6, /* cost for loading QImode using movzbl */
2097 {6, 6, 6}, /* cost of loading integer registers
2098 in QImode, HImode and SImode.
2099 Relative to reg-reg move (2). */
2100 {6, 6, 6}, /* cost of storing integer registers */
2101 4, /* cost of reg,reg fld/fst */
2102 {6, 6, 12}, /* cost of loading fp registers
2103 in SFmode, DFmode and XFmode */
2104 {6, 6, 12}, /* cost of storing fp registers
2105 in SFmode, DFmode and XFmode */
2106 2, /* cost of moving MMX register */
2107 {6, 6}, /* cost of loading MMX registers
2108 in SImode and DImode */
2109 {6, 6}, /* cost of storing MMX registers
2110 in SImode and DImode */
2111 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2112 {6, 6, 6, 10, 15}, /* cost of loading SSE registers
2113 in 32,64,128,256 and 512-bit */
2114 {6, 6, 6, 10, 15}, /* cost of storing SSE registers
2115 in 32,64,128,256 and 512-bit */
2116 6, 6, /* SSE->integer and integer->SSE moves */
2117 6, 6, /* mask->integer and integer->mask moves */
2118 {6, 6, 6}, /* cost of loading mask register
2119 in QImode, HImode, SImode. */
2120 {6, 6, 6}, /* cost if storing mask register
2121 in QImode, HImode, SImode. */
2122 2, /* cost of moving mask register. */
2123 /* End of register allocator costs. */
2124 },
2125
2126 COSTS_N_INSNS (1), /* cost of an add instruction */
2127 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2128 COSTS_N_INSNS (1), /* variable shift costs */
2129 COSTS_N_INSNS (1), /* constant shift costs */
2130 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2131 COSTS_N_INSNS (4), /* HI */
2132 COSTS_N_INSNS (3), /* SI */
2133 COSTS_N_INSNS (4), /* DI */
2134 COSTS_N_INSNS (4)}, /* other */
2135 0, /* cost of multiply per each bit set */
2136 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */
2137 COSTS_N_INSNS (22), /* HI */
2138 COSTS_N_INSNS (30), /* SI */
2139 COSTS_N_INSNS (74), /* DI */
2140 COSTS_N_INSNS (74)}, /* other */
2141 COSTS_N_INSNS (1), /* cost of movsx */
2142 COSTS_N_INSNS (1), /* cost of movzx */
2143 8, /* "large" insn */
2144 17, /* MOVE_RATIO */
2145 17, /* CLEAR_RATIO */
2146 {6, 6, 6}, /* cost of loading integer registers
2147 in QImode, HImode and SImode.
2148 Relative to reg-reg move (2). */
2149 {6, 6, 6}, /* cost of storing integer registers */
2150 {6, 6, 6, 10, 15}, /* cost of loading SSE register
2151 in 32bit, 64bit, 128bit, 256bit and 512bit */
2152 {6, 6, 6, 10, 15}, /* cost of storing SSE register
2153 in 32bit, 64bit, 128bit, 256bit and 512bit */
2154 {6, 6, 6, 10, 15}, /* cost of unaligned loads. */
2155 {6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2156 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2157 6, /* cost of moving SSE register to integer. */
2158 18, 6, /* Gather load static, per_elt. */
2159 18, 6, /* Gather store static, per_elt. */
2160 32, /* size of l1 cache. */
2161 512, /* size of l2 cache. */
2162 64, /* size of prefetch block */
2163 6, /* number of parallel prefetches */
2164 3, /* Branch cost */
2165 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
2166 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
2167 COSTS_N_INSNS (17), /* cost of FDIV instruction. */
2168 COSTS_N_INSNS (1), /* cost of FABS instruction. */
2169 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
2170 COSTS_N_INSNS (14), /* cost of FSQRT instruction. */
2171
2172 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
2173 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2174 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2175 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
2176 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2177 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
2178 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
2179 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
2180 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
2181 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
2182 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
2183 alderlake_memcpy,
2184 alderlake_memset,
2185 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
2186 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
2187 "16:11:8", /* Loop alignment. */
2188 "16:11:8", /* Jump alignment. */
2189 "0:0:8", /* Label alignment. */
2190 "16", /* Func alignment. */
2191};
2192
64766e8d
JH
2193 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
2194 very small blocks it is better to use loop. For large blocks, libcall can
2195 do nontemporary accesses and beat inline considerably. */
2196static stringop_algs btver1_memcpy[2] = {
2197 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
2198 {-1, rep_prefix_4_byte, false}}},
2199 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
2200 {-1, libcall, false}}}};
2201static stringop_algs btver1_memset[2] = {
2202 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
2203 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2204 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
2205 {-1, libcall, false}}}};
2206const struct processor_costs btver1_cost = {
72bb85f8 2207 {
d321551c
L
2208 /* Start of register allocator costs. integer->integer move cost is 2. */
2209 8, /* cost for loading QImode using movzbl */
2210 {6, 8, 6}, /* cost of loading integer registers
2211 in QImode, HImode and SImode.
2212 Relative to reg-reg move (2). */
2213 {6, 8, 6}, /* cost of storing integer registers */
2214 4, /* cost of reg,reg fld/fst */
2215 {12, 12, 28}, /* cost of loading fp registers
2216 in SFmode, DFmode and XFmode */
2217 {12, 12, 38}, /* cost of storing fp registers
2218 in SFmode, DFmode and XFmode */
2219 4, /* cost of moving MMX register */
2220 {10, 10}, /* cost of loading MMX registers
2221 in SImode and DImode */
2222 {12, 12}, /* cost of storing MMX registers
2223 in SImode and DImode */
2224 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2225 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
2226 in 32,64,128,256 and 512-bit */
2227 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
2228 in 32,64,128,256 and 512-bit */
2229 14, 14, /* SSE->integer and integer->SSE moves */
ecc3135a 2230 14, 14, /* mask->integer and integer->mask moves */
00cb3494
L
2231 {6, 8, 6}, /* cost of loading mask register
2232 in QImode, HImode, SImode. */
2233 {6, 8, 6}, /* cost if storing mask register
2234 in QImode, HImode, SImode. */
2235 2, /* cost of moving mask register. */
d321551c 2236 /* End of register allocator costs. */
72bb85f8 2237 },
d321551c 2238
64766e8d
JH
2239 COSTS_N_INSNS (1), /* cost of an add instruction */
2240 COSTS_N_INSNS (2), /* cost of a lea instruction */
2241 COSTS_N_INSNS (1), /* variable shift costs */
2242 COSTS_N_INSNS (1), /* constant shift costs */
2243 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2244 COSTS_N_INSNS (4), /* HI */
2245 COSTS_N_INSNS (3), /* SI */
2246 COSTS_N_INSNS (4), /* DI */
2247 COSTS_N_INSNS (5)}, /* other */
2248 0, /* cost of multiply per each bit set */
2249 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
2250 COSTS_N_INSNS (35), /* HI */
2251 COSTS_N_INSNS (51), /* SI */
2252 COSTS_N_INSNS (83), /* DI */
2253 COSTS_N_INSNS (83)}, /* other */
2254 COSTS_N_INSNS (1), /* cost of movsx */
2255 COSTS_N_INSNS (1), /* cost of movzx */
2256 8, /* "large" insn */
2257 9, /* MOVE_RATIO */
25e22b19 2258 6, /* CLEAR_RATIO */
df41dbaf 2259 {6, 8, 6}, /* cost of loading integer registers
64766e8d
JH
2260 in QImode, HImode and SImode.
2261 Relative to reg-reg move (2). */
df41dbaf 2262 {6, 8, 6}, /* cost of storing integer registers */
d321551c
L
2263 {10, 10, 12, 48, 96}, /* cost of loading SSE register
2264 in 32bit, 64bit, 128bit, 256bit and 512bit */
2265 {10, 10, 12, 48, 96}, /* cost of storing SSE register
2266 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 2267 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
b7167993 2268 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
d321551c
L
2269 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2270 14, /* cost of moving SSE register to integer. */
a4fe6139
JH
2271 10, 10, /* Gather load static, per_elt. */
2272 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
2273 32, /* size of l1 cache. */
2274 512, /* size of l2 cache. */
2275 64, /* size of prefetch block */
2276 100, /* number of parallel prefetches */
2277 2, /* Branch cost */
2278 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
2279 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
2280 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2281 COSTS_N_INSNS (2), /* cost of FABS instruction. */
2282 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
2283 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 2284
c53c148c 2285 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2286 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2287 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
2288 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
2289 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2290 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
2291 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
2292 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
2293 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
2294 COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
64766e8d
JH
2295 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2296 btver1_memcpy,
2297 btver1_memset,
f6fd8f2b
JH
2298 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
2299 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2300 "16:11:8", /* Loop alignment. */
2301 "16:8:8", /* Jump alignment. */
2302 "0:0:8", /* Label alignment. */
2303 "11", /* Func alignment. */
64766e8d
JH
2304};
2305
2306static stringop_algs btver2_memcpy[2] = {
2307 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
2308 {-1, rep_prefix_4_byte, false}}},
2309 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
2310 {-1, libcall, false}}}};
2311static stringop_algs btver2_memset[2] = {
2312 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
2313 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2314 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
2315 {-1, libcall, false}}}};
2316const struct processor_costs btver2_cost = {
72bb85f8 2317 {
d321551c
L
2318 /* Start of register allocator costs. integer->integer move cost is 2. */
2319 8, /* cost for loading QImode using movzbl */
2320 {8, 8, 6}, /* cost of loading integer registers
2321 in QImode, HImode and SImode.
2322 Relative to reg-reg move (2). */
2323 {8, 8, 6}, /* cost of storing integer registers */
2324 4, /* cost of reg,reg fld/fst */
2325 {12, 12, 28}, /* cost of loading fp registers
2326 in SFmode, DFmode and XFmode */
2327 {12, 12, 38}, /* cost of storing fp registers
2328 in SFmode, DFmode and XFmode */
2329 4, /* cost of moving MMX register */
2330 {10, 10}, /* cost of loading MMX registers
2331 in SImode and DImode */
2332 {12, 12}, /* cost of storing MMX registers
2333 in SImode and DImode */
2334 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2335 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
2336 in 32,64,128,256 and 512-bit */
2337 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
2338 in 32,64,128,256 and 512-bit */
2339 14, 14, /* SSE->integer and integer->SSE moves */
ecc3135a 2340 14, 14, /* mask->integer and integer->mask moves */
00cb3494
L
2341 {8, 8, 6}, /* cost of loading mask register
2342 in QImode, HImode, SImode. */
2343 {8, 8, 6}, /* cost if storing mask register
2344 in QImode, HImode, SImode. */
2345 2, /* cost of moving mask register. */
d321551c 2346 /* End of register allocator costs. */
72bb85f8 2347 },
d321551c 2348
64766e8d
JH
2349 COSTS_N_INSNS (1), /* cost of an add instruction */
2350 COSTS_N_INSNS (2), /* cost of a lea instruction */
2351 COSTS_N_INSNS (1), /* variable shift costs */
2352 COSTS_N_INSNS (1), /* constant shift costs */
2353 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2354 COSTS_N_INSNS (4), /* HI */
2355 COSTS_N_INSNS (3), /* SI */
2356 COSTS_N_INSNS (4), /* DI */
2357 COSTS_N_INSNS (5)}, /* other */
2358 0, /* cost of multiply per each bit set */
2359 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
2360 COSTS_N_INSNS (35), /* HI */
2361 COSTS_N_INSNS (51), /* SI */
2362 COSTS_N_INSNS (83), /* DI */
2363 COSTS_N_INSNS (83)}, /* other */
2364 COSTS_N_INSNS (1), /* cost of movsx */
2365 COSTS_N_INSNS (1), /* cost of movzx */
2366 8, /* "large" insn */
2367 9, /* MOVE_RATIO */
25e22b19 2368 6, /* CLEAR_RATIO */
df41dbaf 2369 {8, 8, 6}, /* cost of loading integer registers
64766e8d
JH
2370 in QImode, HImode and SImode.
2371 Relative to reg-reg move (2). */
df41dbaf 2372 {8, 8, 6}, /* cost of storing integer registers */
d321551c
L
2373 {10, 10, 12, 48, 96}, /* cost of loading SSE register
2374 in 32bit, 64bit, 128bit, 256bit and 512bit */
2375 {10, 10, 12, 48, 96}, /* cost of storing SSE register
2376 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 2377 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
b7167993 2378 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
d321551c
L
2379 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2380 14, /* cost of moving SSE register to integer. */
a4fe6139
JH
2381 10, 10, /* Gather load static, per_elt. */
2382 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
2383 32, /* size of l1 cache. */
2384 2048, /* size of l2 cache. */
2385 64, /* size of prefetch block */
2386 100, /* number of parallel prefetches */
2387 2, /* Branch cost */
2388 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
2389 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
2390 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
2391 COSTS_N_INSNS (2), /* cost of FABS instruction. */
2392 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
2393 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 2394
c53c148c 2395 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2396 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2397 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
2398 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
2399 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2400 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
2401 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
2402 COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
2403 COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
2404 COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
64766e8d
JH
2405 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2406 btver2_memcpy,
2407 btver2_memset,
f6fd8f2b
JH
2408 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
2409 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2410 "16:11:8", /* Loop alignment. */
2411 "16:8:8", /* Jump alignment. */
2412 "0:0:8", /* Label alignment. */
2413 "11", /* Func alignment. */
64766e8d
JH
2414};
2415
2416static stringop_algs pentium4_memcpy[2] = {
2417 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
2418 DUMMY_STRINGOP_ALGS};
2419static stringop_algs pentium4_memset[2] = {
2420 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
2421 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2422 DUMMY_STRINGOP_ALGS};
2423
2424static const
2425struct processor_costs pentium4_cost = {
72bb85f8 2426 {
d321551c 2427 /* Start of register allocator costs. integer->integer move cost is 2. */
df41dbaf 2428 5, /* cost for loading QImode using movzbl */
64766e8d
JH
2429 {4, 5, 4}, /* cost of loading integer registers
2430 in QImode, HImode and SImode.
2431 Relative to reg-reg move (2). */
2432 {2, 3, 2}, /* cost of storing integer registers */
df41dbaf
JH
2433 12, /* cost of reg,reg fld/fst */
2434 {14, 14, 14}, /* cost of loading fp registers
64766e8d 2435 in SFmode, DFmode and XFmode */
df41dbaf 2436 {14, 14, 14}, /* cost of storing fp registers
64766e8d 2437 in SFmode, DFmode and XFmode */
df41dbaf
JH
2438 12, /* cost of moving MMX register */
2439 {16, 16}, /* cost of loading MMX registers
64766e8d 2440 in SImode and DImode */
df41dbaf 2441 {16, 16}, /* cost of storing MMX registers
64766e8d 2442 in SImode and DImode */
df41dbaf
JH
2443 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
2444 {16, 16, 16, 32, 64}, /* cost of loading SSE registers
2445 in 32,64,128,256 and 512-bit */
d321551c
L
2446 {16, 16, 16, 32, 64}, /* cost of storing SSE registers
2447 in 32,64,128,256 and 512-bit */
2448 20, 12, /* SSE->integer and integer->SSE moves */
ecc3135a 2449 20, 12, /* mask->integer and integer->mask moves */
00cb3494
L
2450 {4, 5, 4}, /* cost of loading mask register
2451 in QImode, HImode, SImode. */
2452 {2, 3, 2}, /* cost if storing mask register
2453 in QImode, HImode, SImode. */
2454 2, /* cost of moving mask register. */
d321551c 2455 /* End of register allocator costs. */
72bb85f8 2456 },
d321551c
L
2457
2458 COSTS_N_INSNS (1), /* cost of an add instruction */
2459 COSTS_N_INSNS (3), /* cost of a lea instruction */
2460 COSTS_N_INSNS (4), /* variable shift costs */
2461 COSTS_N_INSNS (4), /* constant shift costs */
2462 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
2463 COSTS_N_INSNS (15), /* HI */
2464 COSTS_N_INSNS (15), /* SI */
2465 COSTS_N_INSNS (15), /* DI */
2466 COSTS_N_INSNS (15)}, /* other */
2467 0, /* cost of multiply per each bit set */
2468 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
2469 COSTS_N_INSNS (56), /* HI */
2470 COSTS_N_INSNS (56), /* SI */
2471 COSTS_N_INSNS (56), /* DI */
2472 COSTS_N_INSNS (56)}, /* other */
2473 COSTS_N_INSNS (1), /* cost of movsx */
2474 COSTS_N_INSNS (1), /* cost of movzx */
2475 16, /* "large" insn */
2476 6, /* MOVE_RATIO */
25e22b19 2477 6, /* CLEAR_RATIO */
d321551c
L
2478 {4, 5, 4}, /* cost of loading integer registers
2479 in QImode, HImode and SImode.
2480 Relative to reg-reg move (2). */
2481 {2, 3, 2}, /* cost of storing integer registers */
2482 {16, 16, 16, 32, 64}, /* cost of loading SSE register
2483 in 32bit, 64bit, 128bit, 256bit and 512bit */
2484 {16, 16, 16, 32, 64}, /* cost of storing SSE register
2485 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2486 {32, 32, 32, 64, 128}, /* cost of unaligned loads. */
df41dbaf 2487 {32, 32, 32, 64, 128}, /* cost of unaligned stores. */
d321551c
L
2488 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
2489 20, /* cost of moving SSE register to integer. */
a4fe6139
JH
2490 16, 16, /* Gather load static, per_elt. */
2491 16, 16, /* Gather store static, per_elt. */
64766e8d
JH
2492 8, /* size of l1 cache. */
2493 256, /* size of l2 cache. */
2494 64, /* size of prefetch block */
2495 6, /* number of parallel prefetches */
2496 2, /* Branch cost */
2497 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
2498 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
2499 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
2500 COSTS_N_INSNS (2), /* cost of FABS instruction. */
2501 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
2502 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
6065f444 2503
c53c148c 2504 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
2505 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
2506 COSTS_N_INSNS (6), /* cost of MULSS instruction. */
2507 COSTS_N_INSNS (6), /* cost of MULSD instruction. */
c53c148c
JH
2508 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2509 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2510 COSTS_N_INSNS (23), /* cost of DIVSS instruction. */
2511 COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
2512 COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
2513 COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
64766e8d
JH
2514 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2515 pentium4_memcpy,
2516 pentium4_memset,
f6fd8f2b
JH
2517 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2518 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2519 NULL, /* Loop alignment. */
2520 NULL, /* Jump alignment. */
2521 NULL, /* Label alignment. */
2522 NULL, /* Func alignment. */
64766e8d
JH
2523};
2524
2525static stringop_algs nocona_memcpy[2] = {
2526 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
2527 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
2528 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
2529
2530static stringop_algs nocona_memset[2] = {
2531 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
2532 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2533 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
2534 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2535
2536static const
2537struct processor_costs nocona_cost = {
72bb85f8 2538 {
d321551c
L
2539 /* Start of register allocator costs. integer->integer move cost is 2. */
2540 4, /* cost for loading QImode using movzbl */
2541 {4, 4, 4}, /* cost of loading integer registers
2542 in QImode, HImode and SImode.
2543 Relative to reg-reg move (2). */
2544 {4, 4, 4}, /* cost of storing integer registers */
2545 12, /* cost of reg,reg fld/fst */
2546 {14, 14, 14}, /* cost of loading fp registers
2547 in SFmode, DFmode and XFmode */
2548 {14, 14, 14}, /* cost of storing fp registers
2549 in SFmode, DFmode and XFmode */
2550 14, /* cost of moving MMX register */
2551 {12, 12}, /* cost of loading MMX registers
2552 in SImode and DImode */
2553 {12, 12}, /* cost of storing MMX registers
2554 in SImode and DImode */
2555 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
2556 {12, 12, 12, 24, 48}, /* cost of loading SSE registers
2557 in 32,64,128,256 and 512-bit */
2558 {12, 12, 12, 24, 48}, /* cost of storing SSE registers
2559 in 32,64,128,256 and 512-bit */
2560 20, 12, /* SSE->integer and integer->SSE moves */
ecc3135a 2561 20, 12, /* mask->integer and integer->mask moves */
00cb3494
L
2562 {4, 4, 4}, /* cost of loading mask register
2563 in QImode, HImode, SImode. */
2564 {4, 4, 4}, /* cost if storing mask register
2565 in QImode, HImode, SImode. */
2566 2, /* cost of moving mask register. */
d321551c 2567 /* End of register allocator costs. */
72bb85f8 2568 },
d321551c 2569
64766e8d
JH
2570 COSTS_N_INSNS (1), /* cost of an add instruction */
2571 COSTS_N_INSNS (1), /* cost of a lea instruction */
2572 COSTS_N_INSNS (1), /* variable shift costs */
2573 COSTS_N_INSNS (1), /* constant shift costs */
2574 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
2575 COSTS_N_INSNS (10), /* HI */
2576 COSTS_N_INSNS (10), /* SI */
2577 COSTS_N_INSNS (10), /* DI */
2578 COSTS_N_INSNS (10)}, /* other */
2579 0, /* cost of multiply per each bit set */
2580 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
2581 COSTS_N_INSNS (66), /* HI */
2582 COSTS_N_INSNS (66), /* SI */
2583 COSTS_N_INSNS (66), /* DI */
2584 COSTS_N_INSNS (66)}, /* other */
2585 COSTS_N_INSNS (1), /* cost of movsx */
2586 COSTS_N_INSNS (1), /* cost of movzx */
2587 16, /* "large" insn */
2588 17, /* MOVE_RATIO */
25e22b19 2589 6, /* CLEAR_RATIO */
64766e8d
JH
2590 {4, 4, 4}, /* cost of loading integer registers
2591 in QImode, HImode and SImode.
2592 Relative to reg-reg move (2). */
2593 {4, 4, 4}, /* cost of storing integer registers */
d321551c
L
2594 {12, 12, 12, 24, 48}, /* cost of loading SSE register
2595 in 32bit, 64bit, 128bit, 256bit and 512bit */
2596 {12, 12, 12, 24, 48}, /* cost of storing SSE register
2597 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2598 {24, 24, 24, 48, 96}, /* cost of unaligned loads. */
df41dbaf 2599 {24, 24, 24, 48, 96}, /* cost of unaligned stores. */
d321551c
L
2600 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
2601 20, /* cost of moving SSE register to integer. */
a4fe6139
JH
2602 12, 12, /* Gather load static, per_elt. */
2603 12, 12, /* Gather store static, per_elt. */
64766e8d
JH
2604 8, /* size of l1 cache. */
2605 1024, /* size of l2 cache. */
2606 64, /* size of prefetch block */
2607 8, /* number of parallel prefetches */
2608 1, /* Branch cost */
2609 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
2610 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2611 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
2612 COSTS_N_INSNS (3), /* cost of FABS instruction. */
2613 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
2614 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
6065f444 2615
c53c148c 2616 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
2617 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
2618 COSTS_N_INSNS (7), /* cost of MULSS instruction. */
2619 COSTS_N_INSNS (7), /* cost of MULSD instruction. */
c53c148c
JH
2620 COSTS_N_INSNS (7), /* cost of FMA SS instruction. */
2621 COSTS_N_INSNS (7), /* cost of FMA SD instruction. */
6065f444
JH
2622 COSTS_N_INSNS (32), /* cost of DIVSS instruction. */
2623 COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
2624 COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
2625 COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
64766e8d
JH
2626 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2627 nocona_memcpy,
2628 nocona_memset,
f6fd8f2b
JH
2629 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2630 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2631 NULL, /* Loop alignment. */
2632 NULL, /* Jump alignment. */
2633 NULL, /* Label alignment. */
2634 NULL, /* Func alignment. */
64766e8d
JH
2635};
2636
2637static stringop_algs atom_memcpy[2] = {
2638 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2639 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2640 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2641static stringop_algs atom_memset[2] = {
2642 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2643 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2644 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2645 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2646static const
2647struct processor_costs atom_cost = {
72bb85f8 2648 {
d321551c
L
2649 /* Start of register allocator costs. integer->integer move cost is 2. */
2650 6, /* cost for loading QImode using movzbl */
2651 {6, 6, 6}, /* cost of loading integer registers
2652 in QImode, HImode and SImode.
2653 Relative to reg-reg move (2). */
2654 {6, 6, 6}, /* cost of storing integer registers */
2655 4, /* cost of reg,reg fld/fst */
2656 {6, 6, 18}, /* cost of loading fp registers
2657 in SFmode, DFmode and XFmode */
2658 {14, 14, 24}, /* cost of storing fp registers
2659 in SFmode, DFmode and XFmode */
2660 2, /* cost of moving MMX register */
2661 {8, 8}, /* cost of loading MMX registers
2662 in SImode and DImode */
2663 {10, 10}, /* cost of storing MMX registers
2664 in SImode and DImode */
2665 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2666 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2667 in 32,64,128,256 and 512-bit */
2668 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2669 in 32,64,128,256 and 512-bit */
ecc3135a 2670 8, 6, /* SSE->integer and integer->SSE moves */
2671 8, 6, /* mask->integer and integer->mask moves */
00cb3494
L
2672 {6, 6, 6}, /* cost of loading mask register
2673 in QImode, HImode, SImode. */
2674 {6, 6, 6}, /* cost if storing mask register
2675 in QImode, HImode, SImode. */
2676 2, /* cost of moving mask register. */
d321551c 2677 /* End of register allocator costs. */
72bb85f8 2678 },
d321551c 2679
64766e8d
JH
2680 COSTS_N_INSNS (1), /* cost of an add instruction */
2681 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2682 COSTS_N_INSNS (1), /* variable shift costs */
2683 COSTS_N_INSNS (1), /* constant shift costs */
2684 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2685 COSTS_N_INSNS (4), /* HI */
2686 COSTS_N_INSNS (3), /* SI */
2687 COSTS_N_INSNS (4), /* DI */
2688 COSTS_N_INSNS (2)}, /* other */
2689 0, /* cost of multiply per each bit set */
2690 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2691 COSTS_N_INSNS (26), /* HI */
2692 COSTS_N_INSNS (42), /* SI */
2693 COSTS_N_INSNS (74), /* DI */
2694 COSTS_N_INSNS (74)}, /* other */
2695 COSTS_N_INSNS (1), /* cost of movsx */
2696 COSTS_N_INSNS (1), /* cost of movzx */
2697 8, /* "large" insn */
2698 17, /* MOVE_RATIO */
25e22b19 2699 6, /* CLEAR_RATIO */
df41dbaf 2700 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
2701 in QImode, HImode and SImode.
2702 Relative to reg-reg move (2). */
df41dbaf 2703 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2704 {8, 8, 8, 16, 32}, /* cost of loading SSE register
2705 in 32bit, 64bit, 128bit, 256bit and 512bit */
2706 {8, 8, 8, 16, 32}, /* cost of storing SSE register
2707 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2708 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 2709 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
2710 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2711 8, /* cost of moving SSE register to integer. */
a4fe6139
JH
2712 8, 8, /* Gather load static, per_elt. */
2713 8, 8, /* Gather store static, per_elt. */
64766e8d
JH
2714 32, /* size of l1 cache. */
2715 256, /* size of l2 cache. */
2716 64, /* size of prefetch block */
2717 6, /* number of parallel prefetches */
2718 3, /* Branch cost */
2719 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2720 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2721 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2722 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2723 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2724 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2725
c53c148c 2726 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2727 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
2728 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2729 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2730 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2731 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2732 COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
2733 COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
2734 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
2735 COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
64766e8d
JH
2736 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
2737 atom_memcpy,
2738 atom_memset,
f6fd8f2b
JH
2739 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2740 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2741 "16", /* Loop alignment. */
2742 "16:8:8", /* Jump alignment. */
2743 "0:0:8", /* Label alignment. */
2744 "16", /* Func alignment. */
64766e8d
JH
2745};
2746
2747static stringop_algs slm_memcpy[2] = {
2748 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2749 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2750 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2751static stringop_algs slm_memset[2] = {
2752 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2753 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2754 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2755 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2756static const
2757struct processor_costs slm_cost = {
72bb85f8 2758 {
d321551c
L
2759 /* Start of register allocator costs. integer->integer move cost is 2. */
2760 8, /* cost for loading QImode using movzbl */
2761 {8, 8, 8}, /* cost of loading integer registers
2762 in QImode, HImode and SImode.
2763 Relative to reg-reg move (2). */
2764 {6, 6, 6}, /* cost of storing integer registers */
2765 2, /* cost of reg,reg fld/fst */
2766 {8, 8, 18}, /* cost of loading fp registers
2767 in SFmode, DFmode and XFmode */
2768 {6, 6, 18}, /* cost of storing fp registers
2769 in SFmode, DFmode and XFmode */
2770 2, /* cost of moving MMX register */
2771 {8, 8}, /* cost of loading MMX registers
2772 in SImode and DImode */
2773 {6, 6}, /* cost of storing MMX registers
2774 in SImode and DImode */
2775 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2776 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2777 in 32,64,128,256 and 512-bit */
2778 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2779 in 32,64,128,256 and 512-bit */
ecc3135a 2780 8, 6, /* SSE->integer and integer->SSE moves */
2781 8, 6, /* mask->integer and integer->mask moves */
00cb3494
L
2782 {8, 8, 8}, /* cost of loading mask register
2783 in QImode, HImode, SImode. */
2784 {6, 6, 6}, /* cost if storing mask register
2785 in QImode, HImode, SImode. */
2786 2, /* cost of moving mask register. */
d321551c 2787 /* End of register allocator costs. */
72bb85f8 2788 },
d321551c 2789
64766e8d
JH
2790 COSTS_N_INSNS (1), /* cost of an add instruction */
2791 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2792 COSTS_N_INSNS (1), /* variable shift costs */
2793 COSTS_N_INSNS (1), /* constant shift costs */
2794 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2795 COSTS_N_INSNS (3), /* HI */
2796 COSTS_N_INSNS (3), /* SI */
2797 COSTS_N_INSNS (4), /* DI */
2798 COSTS_N_INSNS (2)}, /* other */
2799 0, /* cost of multiply per each bit set */
2800 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2801 COSTS_N_INSNS (26), /* HI */
2802 COSTS_N_INSNS (42), /* SI */
2803 COSTS_N_INSNS (74), /* DI */
2804 COSTS_N_INSNS (74)}, /* other */
2805 COSTS_N_INSNS (1), /* cost of movsx */
2806 COSTS_N_INSNS (1), /* cost of movzx */
2807 8, /* "large" insn */
2808 17, /* MOVE_RATIO */
25e22b19 2809 6, /* CLEAR_RATIO */
df41dbaf 2810 {8, 8, 8}, /* cost of loading integer registers
64766e8d
JH
2811 in QImode, HImode and SImode.
2812 Relative to reg-reg move (2). */
df41dbaf 2813 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2814 {8, 8, 8, 16, 32}, /* cost of loading SSE register
2815 in 32bit, 64bit, 128bit, 256bit and 512bit */
2816 {8, 8, 8, 16, 32}, /* cost of storing SSE register
2817 in SImode, DImode and TImode. */
df41dbaf 2818 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 2819 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
2820 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2821 8, /* cost of moving SSE register to integer. */
a4fe6139
JH
2822 8, 8, /* Gather load static, per_elt. */
2823 8, 8, /* Gather store static, per_elt. */
64766e8d
JH
2824 32, /* size of l1 cache. */
2825 256, /* size of l2 cache. */
2826 64, /* size of prefetch block */
2827 6, /* number of parallel prefetches */
2828 3, /* Branch cost */
2829 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2830 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2831 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2832 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2833 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2834 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2835
c53c148c 2836 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2837 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2838 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2839 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2840 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2841 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2842 COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
2843 COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
2844 COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
2845 COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
64766e8d
JH
2846 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2847 slm_memcpy,
2848 slm_memset,
f6fd8f2b
JH
2849 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2850 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2851 "16", /* Loop alignment. */
2852 "16:8:8", /* Jump alignment. */
2853 "0:0:8", /* Label alignment. */
2854 "16", /* Func alignment. */
64766e8d
JH
2855};
2856
c3a2437f
L
2857static stringop_algs tremont_memcpy[2] = {
2858 {libcall,
2859 {{256, rep_prefix_1_byte, true},
2860 {256, loop, false},
2861 {-1, libcall, false}}},
2862 {libcall,
2863 {{256, rep_prefix_1_byte, true},
2864 {256, loop, false},
2865 {-1, libcall, false}}}};
2866static stringop_algs tremont_memset[2] = {
2867 {libcall,
2868 {{256, rep_prefix_1_byte, true},
2869 {256, loop, false},
2870 {-1, libcall, false}}},
2871 {libcall,
2872 {{256, rep_prefix_1_byte, true},
2873 {256, loop, false},
2874 {-1, libcall, false}}}};
2875static const
2876struct processor_costs tremont_cost = {
2877 {
2878 /* Start of register allocator costs. integer->integer move cost is 2. */
2879 6, /* cost for loading QImode using movzbl */
2880 {6, 6, 6}, /* cost of loading integer registers
2881 in QImode, HImode and SImode.
2882 Relative to reg-reg move (2). */
2883 {6, 6, 6}, /* cost of storing integer registers */
2884 4, /* cost of reg,reg fld/fst */
2885 {6, 6, 12}, /* cost of loading fp registers
2886 in SFmode, DFmode and XFmode */
2887 {6, 6, 12}, /* cost of storing fp registers
2888 in SFmode, DFmode and XFmode */
2889 2, /* cost of moving MMX register */
2890 {6, 6}, /* cost of loading MMX registers
2891 in SImode and DImode */
2892 {6, 6}, /* cost of storing MMX registers
2893 in SImode and DImode */
2894 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2895 {6, 6, 6, 10, 15}, /* cost of loading SSE registers
2896 in 32,64,128,256 and 512-bit */
2897 {6, 6, 6, 10, 15}, /* cost of storing SSE registers
2898 in 32,64,128,256 and 512-bit */
2899 6, 6, /* SSE->integer and integer->SSE moves */
2900 6, 6, /* mask->integer and integer->mask moves */
2901 {6, 6, 6}, /* cost of loading mask register
2902 in QImode, HImode, SImode. */
2903 {6, 6, 6}, /* cost if storing mask register
2904 in QImode, HImode, SImode. */
2905 2, /* cost of moving mask register. */
2906 /* End of register allocator costs. */
2907 },
2908
2909 COSTS_N_INSNS (1), /* cost of an add instruction */
2910 /* Setting cost to 2 makes our current implementation of synth_mult result in
2911 use of unnecessary temporary registers causing regression on several
2912 SPECfp benchmarks. */
2913 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2914 COSTS_N_INSNS (1), /* variable shift costs */
2915 COSTS_N_INSNS (1), /* constant shift costs */
2916 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2917 COSTS_N_INSNS (4), /* HI */
2918 COSTS_N_INSNS (3), /* SI */
2919 COSTS_N_INSNS (4), /* DI */
2920 COSTS_N_INSNS (4)}, /* other */
2921 0, /* cost of multiply per each bit set */
2922 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */
2923 COSTS_N_INSNS (22), /* HI */
2924 COSTS_N_INSNS (30), /* SI */
2925 COSTS_N_INSNS (74), /* DI */
2926 COSTS_N_INSNS (74)}, /* other */
2927 COSTS_N_INSNS (1), /* cost of movsx */
2928 COSTS_N_INSNS (1), /* cost of movzx */
2929 8, /* "large" insn */
2930 17, /* MOVE_RATIO */
2931 17, /* CLEAR_RATIO */
2932 {6, 6, 6}, /* cost of loading integer registers
2933 in QImode, HImode and SImode.
2934 Relative to reg-reg move (2). */
2935 {6, 6, 6}, /* cost of storing integer registers */
2936 {6, 6, 6, 10, 15}, /* cost of loading SSE register
2937 in 32bit, 64bit, 128bit, 256bit and 512bit */
2938 {6, 6, 6, 10, 15}, /* cost of storing SSE register
2939 in 32bit, 64bit, 128bit, 256bit and 512bit */
2940 {6, 6, 6, 10, 15}, /* cost of unaligned loads. */
2941 {6, 6, 6, 10, 15}, /* cost of unaligned storess. */
2942 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2943 6, /* cost of moving SSE register to integer. */
2944 18, 6, /* Gather load static, per_elt. */
2945 18, 6, /* Gather store static, per_elt. */
2946 32, /* size of l1 cache. */
2947 512, /* size of l2 cache. */
2948 64, /* size of prefetch block */
2949 6, /* number of parallel prefetches */
2950 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2951 value is increased to perhaps more appropriate value of 5. */
2952 3, /* Branch cost */
2953 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
2954 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
2955 COSTS_N_INSNS (17), /* cost of FDIV instruction. */
2956 COSTS_N_INSNS (1), /* cost of FABS instruction. */
2957 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
2958 COSTS_N_INSNS (14), /* cost of FSQRT instruction. */
2959
2960 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
2961 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2962 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2963 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
2964 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2965 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
2966 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
2967 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
2968 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
2969 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
2970 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
2971 tremont_memcpy,
2972 tremont_memset,
2973 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
2974 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
2975 "16:11:8", /* Loop alignment. */
2976 "16:11:8", /* Jump alignment. */
2977 "0:0:8", /* Label alignment. */
2978 "16", /* Func alignment. */
2979};
2980
64766e8d
JH
2981static stringop_algs intel_memcpy[2] = {
2982 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2983 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2984 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2985static stringop_algs intel_memset[2] = {
2986 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2987 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2988 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2989 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2990static const
2991struct processor_costs intel_cost = {
72bb85f8 2992 {
d321551c
L
2993 /* Start of register allocator costs. integer->integer move cost is 2. */
2994 6, /* cost for loading QImode using movzbl */
2995 {4, 4, 4}, /* cost of loading integer registers
2996 in QImode, HImode and SImode.
2997 Relative to reg-reg move (2). */
2998 {6, 6, 6}, /* cost of storing integer registers */
2999 2, /* cost of reg,reg fld/fst */
3000 {6, 6, 8}, /* cost of loading fp registers
3001 in SFmode, DFmode and XFmode */
3002 {6, 6, 10}, /* cost of storing fp registers
3003 in SFmode, DFmode and XFmode */
3004 2, /* cost of moving MMX register */
3005 {6, 6}, /* cost of loading MMX registers
3006 in SImode and DImode */
3007 {6, 6}, /* cost of storing MMX registers
3008 in SImode and DImode */
3009 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
3010 {6, 6, 6, 6, 6}, /* cost of loading SSE registers
3011 in 32,64,128,256 and 512-bit */
3012 {6, 6, 6, 6, 6}, /* cost of storing SSE registers
3013 in 32,64,128,256 and 512-bit */
ecc3135a 3014 4, 4, /* SSE->integer and integer->SSE moves */
3015 4, 4, /* mask->integer and integer->mask moves */
00cb3494
L
3016 {4, 4, 4}, /* cost of loading mask register
3017 in QImode, HImode, SImode. */
3018 {6, 6, 6}, /* cost if storing mask register
3019 in QImode, HImode, SImode. */
3020 2, /* cost of moving mask register. */
d321551c 3021 /* End of register allocator costs. */
72bb85f8 3022 },
d321551c 3023
64766e8d
JH
3024 COSTS_N_INSNS (1), /* cost of an add instruction */
3025 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
3026 COSTS_N_INSNS (1), /* variable shift costs */
3027 COSTS_N_INSNS (1), /* constant shift costs */
3028 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
3029 COSTS_N_INSNS (3), /* HI */
3030 COSTS_N_INSNS (3), /* SI */
3031 COSTS_N_INSNS (4), /* DI */
3032 COSTS_N_INSNS (2)}, /* other */
3033 0, /* cost of multiply per each bit set */
3034 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
3035 COSTS_N_INSNS (26), /* HI */
3036 COSTS_N_INSNS (42), /* SI */
3037 COSTS_N_INSNS (74), /* DI */
3038 COSTS_N_INSNS (74)}, /* other */
3039 COSTS_N_INSNS (1), /* cost of movsx */
3040 COSTS_N_INSNS (1), /* cost of movzx */
3041 8, /* "large" insn */
3042 17, /* MOVE_RATIO */
25e22b19 3043 6, /* CLEAR_RATIO */
64766e8d
JH
3044 {4, 4, 4}, /* cost of loading integer registers
3045 in QImode, HImode and SImode.
3046 Relative to reg-reg move (2). */
af863030 3047 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
3048 {6, 6, 6, 6, 6}, /* cost of loading SSE register
3049 in 32bit, 64bit, 128bit, 256bit and 512bit */
3050 {6, 6, 6, 6, 6}, /* cost of storing SSE register
3051 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 3052 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
df41dbaf 3053 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
d321551c
L
3054 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
3055 4, /* cost of moving SSE register to integer. */
a4fe6139
JH
3056 6, 6, /* Gather load static, per_elt. */
3057 6, 6, /* Gather store static, per_elt. */
64766e8d
JH
3058 32, /* size of l1 cache. */
3059 256, /* size of l2 cache. */
3060 64, /* size of prefetch block */
3061 6, /* number of parallel prefetches */
3062 3, /* Branch cost */
3063 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
3064 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
3065 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
3066 COSTS_N_INSNS (8), /* cost of FABS instruction. */
3067 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
3068 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 3069
3ff59baa 3070 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
3071 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
3072 COSTS_N_INSNS (8), /* cost of MULSS instruction. */
3073 COSTS_N_INSNS (8), /* cost of MULSD instruction. */
c53c148c
JH
3074 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
3075 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
3076 COSTS_N_INSNS (20), /* cost of DIVSS instruction. */
3077 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
3078 COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
3079 COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
64766e8d
JH
3080 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
3081 intel_memcpy,
3082 intel_memset,
f6fd8f2b
JH
3083 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
3084 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
3085 "16", /* Loop alignment. */
3086 "16:8:8", /* Jump alignment. */
3087 "0:0:8", /* Label alignment. */
3088 "16", /* Func alignment. */
64766e8d
JH
3089};
3090
3091/* Generic should produce code tuned for Core-i7 (and newer chips)
3092 and btver1 (and newer chips). */
3093
3094static stringop_algs generic_memcpy[2] = {
3095 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
3096 {-1, libcall, false}}},
3097 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
3098 {-1, libcall, false}}}};
3099static stringop_algs generic_memset[2] = {
3100 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
3101 {-1, libcall, false}}},
3102 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
3103 {-1, libcall, false}}}};
3104static const
3105struct processor_costs generic_cost = {
72bb85f8 3106 {
d321551c
L
3107 /* Start of register allocator costs. integer->integer move cost is 2. */
3108 6, /* cost for loading QImode using movzbl */
3109 {6, 6, 6}, /* cost of loading integer registers
3110 in QImode, HImode and SImode.
3111 Relative to reg-reg move (2). */
3112 {6, 6, 6}, /* cost of storing integer registers */
3113 4, /* cost of reg,reg fld/fst */
3114 {6, 6, 12}, /* cost of loading fp registers
3115 in SFmode, DFmode and XFmode */
3116 {6, 6, 12}, /* cost of storing fp registers
3117 in SFmode, DFmode and XFmode */
3118 2, /* cost of moving MMX register */
3119 {6, 6}, /* cost of loading MMX registers
3120 in SImode and DImode */
3121 {6, 6}, /* cost of storing MMX registers
3122 in SImode and DImode */
3123 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
3124 {6, 6, 6, 10, 15}, /* cost of loading SSE registers
3125 in 32,64,128,256 and 512-bit */
3126 {6, 6, 6, 10, 15}, /* cost of storing SSE registers
3127 in 32,64,128,256 and 512-bit */
ecc3135a 3128 6, 6, /* SSE->integer and integer->SSE moves */
3129 6, 6, /* mask->integer and integer->mask moves */
00cb3494
L
3130 {6, 6, 6}, /* cost of loading mask register
3131 in QImode, HImode, SImode. */
3132 {6, 6, 6}, /* cost if storing mask register
3133 in QImode, HImode, SImode. */
3134 2, /* cost of moving mask register. */
d321551c 3135 /* End of register allocator costs. */
72bb85f8 3136 },
d321551c 3137
64766e8d 3138 COSTS_N_INSNS (1), /* cost of an add instruction */
ef9eec0b 3139 /* Setting cost to 2 makes our current implementation of synth_mult result in
64766e8d
JH
3140 use of unnecessary temporary registers causing regression on several
3141 SPECfp benchmarks. */
3142 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
3143 COSTS_N_INSNS (1), /* variable shift costs */
3144 COSTS_N_INSNS (1), /* constant shift costs */
3145 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
3146 COSTS_N_INSNS (4), /* HI */
3147 COSTS_N_INSNS (3), /* SI */
3148 COSTS_N_INSNS (4), /* DI */
7c080ade 3149 COSTS_N_INSNS (4)}, /* other */
64766e8d 3150 0, /* cost of multiply per each bit set */
7c080ade
JH
3151 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */
3152 COSTS_N_INSNS (22), /* HI */
3153 COSTS_N_INSNS (30), /* SI */
64766e8d
JH
3154 COSTS_N_INSNS (74), /* DI */
3155 COSTS_N_INSNS (74)}, /* other */
3156 COSTS_N_INSNS (1), /* cost of movsx */
3157 COSTS_N_INSNS (1), /* cost of movzx */
3158 8, /* "large" insn */
3159 17, /* MOVE_RATIO */
25e22b19 3160 6, /* CLEAR_RATIO */
d555138e 3161 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
3162 in QImode, HImode and SImode.
3163 Relative to reg-reg move (2). */
af863030 3164 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
3165 {6, 6, 6, 10, 15}, /* cost of loading SSE register
3166 in 32bit, 64bit, 128bit, 256bit and 512bit */
3167 {6, 6, 6, 10, 15}, /* cost of storing SSE register
3168 in 32bit, 64bit, 128bit, 256bit and 512bit */
7c080ade 3169 {6, 6, 6, 10, 15}, /* cost of unaligned loads. */
7c080ade 3170 {6, 6, 6, 10, 15}, /* cost of unaligned storess. */
d321551c
L
3171 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
3172 6, /* cost of moving SSE register to integer. */
7c080ade
JH
3173 18, 6, /* Gather load static, per_elt. */
3174 18, 6, /* Gather store static, per_elt. */
64766e8d
JH
3175 32, /* size of l1 cache. */
3176 512, /* size of l2 cache. */
3177 64, /* size of prefetch block */
3178 6, /* number of parallel prefetches */
3179 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
3180 value is increased to perhaps more appropriate value of 5. */
3181 3, /* Branch cost */
ef9eec0b 3182 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
7c080ade 3183 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
e8e3054e 3184 COSTS_N_INSNS (17), /* cost of FDIV instruction. */
ef9eec0b
JH
3185 COSTS_N_INSNS (1), /* cost of FABS instruction. */
3186 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
e8e3054e 3187 COSTS_N_INSNS (14), /* cost of FSQRT instruction. */
6065f444 3188
ef9eec0b
JH
3189 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
3190 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
3191 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
3192 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
3193 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
3194 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
e8e3054e
JH
3195 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
3196 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
3197 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
3198 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
7c080ade 3199 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
64766e8d
JH
3200 generic_memcpy,
3201 generic_memset,
e8e3054e
JH
3202 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
3203 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
3204 "16:11:8", /* Loop alignment. */
3205 "16:11:8", /* Jump alignment. */
3206 "0:0:8", /* Label alignment. */
3207 "16", /* Func alignment. */
64766e8d
JH
3208};
3209
3210/* core_cost should produce code tuned for Core familly of CPUs. */
3211static stringop_algs core_memcpy[2] = {
3212 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
3213 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
3214 {-1, libcall, false}}}};
3215static stringop_algs core_memset[2] = {
3216 {libcall, {{6, loop_1_byte, true},
3217 {24, loop, true},
3218 {8192, rep_prefix_4_byte, true},
3219 {-1, libcall, false}}},
3220 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
3221 {-1, libcall, false}}}};
3222
3223static const
3224struct processor_costs core_cost = {
72bb85f8 3225 {
d321551c
L
3226 /* Start of register allocator costs. integer->integer move cost is 2. */
3227 6, /* cost for loading QImode using movzbl */
3228 {4, 4, 4}, /* cost of loading integer registers
3229 in QImode, HImode and SImode.
3230 Relative to reg-reg move (2). */
3231 {6, 6, 6}, /* cost of storing integer registers */
3232 2, /* cost of reg,reg fld/fst */
3233 {6, 6, 8}, /* cost of loading fp registers
3234 in SFmode, DFmode and XFmode */
3235 {6, 6, 10}, /* cost of storing fp registers
3236 in SFmode, DFmode and XFmode */
3237 2, /* cost of moving MMX register */
3238 {6, 6}, /* cost of loading MMX registers
3239 in SImode and DImode */
3240 {6, 6}, /* cost of storing MMX registers
3241 in SImode and DImode */
3242 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
3243 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
3244 in 32,64,128,256 and 512-bit */
3245 {6, 6, 6, 6, 12}, /* cost of storing SSE registers
3246 in 32,64,128,256 and 512-bit */
ecc3135a 3247 6, 6, /* SSE->integer and integer->SSE moves */
3248 6, 6, /* mask->integer and integer->mask moves */
00cb3494
L
3249 {4, 4, 4}, /* cost of loading mask register
3250 in QImode, HImode, SImode. */
3251 {6, 6, 6}, /* cost if storing mask register
3252 in QImode, HImode, SImode. */
3253 2, /* cost of moving mask register. */
d321551c 3254 /* End of register allocator costs. */
72bb85f8 3255 },
d321551c 3256
64766e8d
JH
3257 COSTS_N_INSNS (1), /* cost of an add instruction */
3258 /* On all chips taken into consideration lea is 2 cycles and more. With
3259 this cost however our current implementation of synth_mult results in
3260 use of unnecessary temporary registers causing regression on several
3261 SPECfp benchmarks. */
3262 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
3263 COSTS_N_INSNS (1), /* variable shift costs */
3264 COSTS_N_INSNS (1), /* constant shift costs */
3265 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
3266 COSTS_N_INSNS (4), /* HI */
3267 COSTS_N_INSNS (3), /* SI */
a2ef9558
MT
3268 /* Here we tune for Sandybridge or newer. */
3269 COSTS_N_INSNS (3), /* DI */
3270 COSTS_N_INSNS (3)}, /* other */
64766e8d 3271 0, /* cost of multiply per each bit set */
02308bd3
MT
3272 /* Expanding div/mod currently doesn't consider parallelism. So the cost
3273 model is not realistic. We compensate by increasing the latencies a bit. */
3274 {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
3275 COSTS_N_INSNS (11), /* HI */
3276 COSTS_N_INSNS (14), /* SI */
ffa3ce53
JH
3277 COSTS_N_INSNS (81), /* DI */
3278 COSTS_N_INSNS (81)}, /* other */
64766e8d
JH
3279 COSTS_N_INSNS (1), /* cost of movsx */
3280 COSTS_N_INSNS (1), /* cost of movzx */
3281 8, /* "large" insn */
3282 17, /* MOVE_RATIO */
25e22b19 3283 6, /* CLEAR_RATIO */
64766e8d
JH
3284 {4, 4, 4}, /* cost of loading integer registers
3285 in QImode, HImode and SImode.
3286 Relative to reg-reg move (2). */
ffa3ce53 3287 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
3288 {6, 6, 6, 6, 12}, /* cost of loading SSE register
3289 in 32bit, 64bit, 128bit, 256bit and 512bit */
3290 {6, 6, 6, 6, 12}, /* cost of storing SSE register
3291 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 3292 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
df41dbaf 3293 {6, 6, 6, 6, 12}, /* cost of unaligned stores. */
d321551c
L
3294 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
3295 2, /* cost of moving SSE register to integer. */
a4fe6139
JH
3296 /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
3297 rec. throughput 6.
3298 So 5 uops statically and one uops per load. */
3299 10, 6, /* Gather load static, per_elt. */
3300 10, 6, /* Gather store static, per_elt. */
64766e8d
JH
3301 64, /* size of l1 cache. */
3302 512, /* size of l2 cache. */
3303 64, /* size of prefetch block */
3304 6, /* number of parallel prefetches */
3305 /* FIXME perhaps more appropriate value is 5. */
3306 3, /* Branch cost */
ef9eec0b
JH
3307 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
3308 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
ffa3ce53 3309 /* 10-24 */
ef9eec0b
JH
3310 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
3311 COSTS_N_INSNS (1), /* cost of FABS instruction. */
3312 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
ffa3ce53 3313 COSTS_N_INSNS (23), /* cost of FSQRT instruction. */
6065f444 3314
c53c148c 3315 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
3316 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
3317 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
3318 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
3319 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
3320 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
3321 COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
3322 COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
3323 COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
3324 COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
64766e8d
JH
3325 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
3326 core_memcpy,
3327 core_memset,
f6fd8f2b
JH
3328 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
3329 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
3330 "16:11:8", /* Loop alignment. */
3331 "16:11:8", /* Jump alignment. */
3332 "0:0:8", /* Label alignment. */
3333 "16", /* Func alignment. */
64766e8d
JH
3334};
3335