]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/x86-tune-costs.h
i386: Increase Skylake SImode pseudo register store cost
[thirdparty/gcc.git] / gcc / config / i386 / x86-tune-costs.h
CommitLineData
df41dbaf 1/* Costs of operations of individual x86 CPUs.
a5544970 2 Copyright (C) 1988-2019 Free Software Foundation, Inc.
64766e8d 3
df41dbaf
JH
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16Under Section 7 of GPL version 3, you are granted additional
17permissions described in the GCC Runtime Library Exception, version
183.1, as published by the Free Software Foundation.
19
20You should have received a copy of the GNU General Public License and
21a copy of the GCC Runtime Library Exception along with this program;
22see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23<http://www.gnu.org/licenses/>. */
64766e8d
JH
24/* Processor costs (relative to an add) */
25/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
26#define COSTS_N_BYTES(N) ((N) * 2)
27
28#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
29
30static stringop_algs ix86_size_memcpy[2] = {
31 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
32 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
33static stringop_algs ix86_size_memset[2] = {
34 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
35 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
36
37const
38struct processor_costs ix86_size_cost = {/* costs for tuning for size */
72bb85f8 39 {
d321551c
L
40 /* Start of register allocator costs. integer->integer move cost is 2. */
41 2, /* cost for loading QImode using movzbl */
42 {2, 2, 2}, /* cost of loading integer registers
43 in QImode, HImode and SImode.
44 Relative to reg-reg move (2). */
45 {2, 2, 2}, /* cost of storing integer registers */
46 2, /* cost of reg,reg fld/fst */
47 {2, 2, 2}, /* cost of loading fp registers
48 in SFmode, DFmode and XFmode */
49 {2, 2, 2}, /* cost of storing fp registers
50 in SFmode, DFmode and XFmode */
51 3, /* cost of moving MMX register */
52 {3, 3}, /* cost of loading MMX registers
53 in SImode and DImode */
54 {3, 3}, /* cost of storing MMX registers
55 in SImode and DImode */
56 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
57 {3, 3, 3, 3, 3}, /* cost of loading SSE registers
58 in 32,64,128,256 and 512-bit */
59 {3, 3, 3, 3, 3}, /* cost of storing SSE registers
60 in 32,64,128,256 and 512-bit */
61 3, 3, /* SSE->integer and integer->SSE moves */
62 /* End of register allocator costs. */
72bb85f8 63 },
d321551c 64
64766e8d
JH
65 COSTS_N_BYTES (2), /* cost of an add instruction */
66 COSTS_N_BYTES (3), /* cost of a lea instruction */
67 COSTS_N_BYTES (2), /* variable shift costs */
68 COSTS_N_BYTES (3), /* constant shift costs */
69 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
70 COSTS_N_BYTES (3), /* HI */
71 COSTS_N_BYTES (3), /* SI */
72 COSTS_N_BYTES (3), /* DI */
73 COSTS_N_BYTES (5)}, /* other */
74 0, /* cost of multiply per each bit set */
75 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 COSTS_N_BYTES (3), /* cost of movsx */
81 COSTS_N_BYTES (3), /* cost of movzx */
82 0, /* "large" insn */
83 2, /* MOVE_RATIO */
64766e8d
JH
84 {2, 2, 2}, /* cost of loading integer registers
85 in QImode, HImode and SImode.
86 Relative to reg-reg move (2). */
87 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
88 {3, 3, 3, 3, 3}, /* cost of loading SSE register
89 in 32bit, 64bit, 128bit, 256bit and 512bit */
90 {3, 3, 3, 3, 3}, /* cost of storing SSE register
91 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf
JH
92 {3, 3, 3, 3, 3}, /* cost of unaligned SSE load
93 in 128bit, 256bit and 512bit */
d321551c 94 {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
df41dbaf 95 in 128bit, 256bit and 512bit */
d321551c
L
96 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
97 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
98 5, 0, /* Gather load static, per_elt. */
99 5, 0, /* Gather store static, per_elt. */
64766e8d
JH
100 0, /* size of l1 cache */
101 0, /* size of l2 cache */
102 0, /* size of prefetch block */
103 0, /* number of parallel prefetches */
104 2, /* Branch cost */
105 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
106 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
107 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
108 COSTS_N_BYTES (2), /* cost of FABS instruction. */
109 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
110 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
6065f444 111
c53c148c 112 COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
6065f444
JH
113 COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
114 COSTS_N_BYTES (2), /* cost of MULSS instruction. */
115 COSTS_N_BYTES (2), /* cost of MULSD instruction. */
c53c148c
JH
116 COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
117 COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
6065f444
JH
118 COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
119 COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
120 COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
121 COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
64766e8d
JH
122 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
123 ix86_size_memcpy,
124 ix86_size_memset,
f6fd8f2b
JH
125 COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
126 COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
127 NULL, /* Loop alignment. */
128 NULL, /* Jump alignment. */
129 NULL, /* Label alignment. */
130 NULL, /* Func alignment. */
64766e8d
JH
131};
132
133/* Processor costs (relative to an add) */
134static stringop_algs i386_memcpy[2] = {
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
136 DUMMY_STRINGOP_ALGS};
137static stringop_algs i386_memset[2] = {
138 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
139 DUMMY_STRINGOP_ALGS};
140
141static const
142struct processor_costs i386_cost = { /* 386 specific costs */
72bb85f8 143 {
d321551c
L
144 /* Start of register allocator costs. integer->integer move cost is 2. */
145 4, /* cost for loading QImode using movzbl */
146 {2, 4, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 4, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {8, 8, 8}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {8, 8, 8}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 2, /* cost of moving MMX register */
156 {4, 8}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {4, 8}, /* cost of storing MMX registers
159 in SImode and DImode */
160 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
161 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
162 in 32,64,128,256 and 512-bit */
163 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
164 in 32,64,128,256 and 512-bit */
165 3, 3, /* SSE->integer and integer->SSE moves */
166 /* End of register allocator costs. */
72bb85f8 167 },
d321551c 168
64766e8d
JH
169 COSTS_N_INSNS (1), /* cost of an add instruction */
170 COSTS_N_INSNS (1), /* cost of a lea instruction */
171 COSTS_N_INSNS (3), /* variable shift costs */
172 COSTS_N_INSNS (2), /* constant shift costs */
173 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
174 COSTS_N_INSNS (6), /* HI */
175 COSTS_N_INSNS (6), /* SI */
176 COSTS_N_INSNS (6), /* DI */
177 COSTS_N_INSNS (6)}, /* other */
178 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
179 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
180 COSTS_N_INSNS (23), /* HI */
181 COSTS_N_INSNS (23), /* SI */
182 COSTS_N_INSNS (23), /* DI */
183 COSTS_N_INSNS (23)}, /* other */
184 COSTS_N_INSNS (3), /* cost of movsx */
185 COSTS_N_INSNS (2), /* cost of movzx */
186 15, /* "large" insn */
187 3, /* MOVE_RATIO */
64766e8d
JH
188 {2, 4, 2}, /* cost of loading integer registers
189 in QImode, HImode and SImode.
190 Relative to reg-reg move (2). */
191 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
192 {4, 8, 16, 32, 64}, /* cost of loading SSE register
193 in 32bit, 64bit, 128bit, 256bit and 512bit */
194 {4, 8, 16, 32, 64}, /* cost of storing SSE register
195 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 196 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 197 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
198 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
199 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
200 4, 4, /* Gather load static, per_elt. */
201 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
202 0, /* size of l1 cache */
203 0, /* size of l2 cache */
204 0, /* size of prefetch block */
205 0, /* number of parallel prefetches */
206 1, /* Branch cost */
207 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
208 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
209 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
210 COSTS_N_INSNS (22), /* cost of FABS instruction. */
211 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
212 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
6065f444 213
c53c148c 214 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
215 COSTS_N_INSNS (23), /* cost of ADDSS/SD SUBSS/SD insns. */
216 COSTS_N_INSNS (27), /* cost of MULSS instruction. */
217 COSTS_N_INSNS (27), /* cost of MULSD instruction. */
c53c148c
JH
218 COSTS_N_INSNS (27), /* cost of FMA SS instruction. */
219 COSTS_N_INSNS (27), /* cost of FMA SD instruction. */
6065f444
JH
220 COSTS_N_INSNS (88), /* cost of DIVSS instruction. */
221 COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
222 COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
223 COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
64766e8d
JH
224 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
225 i386_memcpy,
226 i386_memset,
f6fd8f2b
JH
227 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
228 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
229 "4", /* Loop alignment. */
230 "4", /* Jump alignment. */
231 NULL, /* Label alignment. */
232 "4", /* Func alignment. */
64766e8d
JH
233};
234
235static stringop_algs i486_memcpy[2] = {
236 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
237 DUMMY_STRINGOP_ALGS};
238static stringop_algs i486_memset[2] = {
239 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
240 DUMMY_STRINGOP_ALGS};
241
242static const
243struct processor_costs i486_cost = { /* 486 specific costs */
72bb85f8 244 {
d321551c
L
245 /* Start of register allocator costs. integer->integer move cost is 2. */
246 4, /* cost for loading QImode using movzbl */
247 {2, 4, 2}, /* cost of loading integer registers
248 in QImode, HImode and SImode.
249 Relative to reg-reg move (2). */
250 {2, 4, 2}, /* cost of storing integer registers */
251 2, /* cost of reg,reg fld/fst */
252 {8, 8, 8}, /* cost of loading fp registers
253 in SFmode, DFmode and XFmode */
254 {8, 8, 8}, /* cost of storing fp registers
255 in SFmode, DFmode and XFmode */
256 2, /* cost of moving MMX register */
257 {4, 8}, /* cost of loading MMX registers
258 in SImode and DImode */
259 {4, 8}, /* cost of storing MMX registers
260 in SImode and DImode */
261 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
262 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
263 in 32,64,128,256 and 512-bit */
264 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
265 in 32,64,128,256 and 512-bit */
266 3, 3, /* SSE->integer and integer->SSE moves */
267 /* End of register allocator costs. */
72bb85f8 268 },
d321551c 269
64766e8d
JH
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
64766e8d
JH
289 {2, 4, 2}, /* cost of loading integer registers
290 in QImode, HImode and SImode.
291 Relative to reg-reg move (2). */
292 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
293 {4, 8, 16, 32, 64}, /* cost of loading SSE register
294 in 32bit, 64bit, 128bit, 256bit and 512bit */
295 {4, 8, 16, 32, 64}, /* cost of storing SSE register
296 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 297 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 298 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
299 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
300 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
301 4, 4, /* Gather load static, per_elt. */
302 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
303 4, /* size of l1 cache. 486 has 8kB cache
304 shared for code and data, so 4kB is
305 not really precise. */
306 4, /* size of l2 cache */
307 0, /* size of prefetch block */
308 0, /* number of parallel prefetches */
309 1, /* Branch cost */
310 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
311 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
312 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
313 COSTS_N_INSNS (3), /* cost of FABS instruction. */
314 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
315 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
6065f444 316
c53c148c 317 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
318 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
319 COSTS_N_INSNS (16), /* cost of MULSS instruction. */
320 COSTS_N_INSNS (16), /* cost of MULSD instruction. */
c53c148c
JH
321 COSTS_N_INSNS (16), /* cost of FMA SS instruction. */
322 COSTS_N_INSNS (16), /* cost of FMA SD instruction. */
6065f444
JH
323 COSTS_N_INSNS (73), /* cost of DIVSS instruction. */
324 COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
325 COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
326 COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
64766e8d
JH
327 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
328 i486_memcpy,
329 i486_memset,
f6fd8f2b
JH
330 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
331 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
332 "16", /* Loop alignment. */
333 "16", /* Jump alignment. */
334 "0:0:8", /* Label alignment. */
335 "16", /* Func alignment. */
64766e8d
JH
336};
337
338static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345static const
346struct processor_costs pentium_cost = {
72bb85f8 347 {
d321551c
L
348 /* Start of register allocator costs. integer->integer move cost is 2. */
349 6, /* cost for loading QImode using movzbl */
350 {2, 4, 2}, /* cost of loading integer registers
351 in QImode, HImode and SImode.
352 Relative to reg-reg move (2). */
353 {2, 4, 2}, /* cost of storing integer registers */
354 2, /* cost of reg,reg fld/fst */
355 {2, 2, 6}, /* cost of loading fp registers
356 in SFmode, DFmode and XFmode */
357 {4, 4, 6}, /* cost of storing fp registers
358 in SFmode, DFmode and XFmode */
359 8, /* cost of moving MMX register */
360 {8, 8}, /* cost of loading MMX registers
361 in SImode and DImode */
362 {8, 8}, /* cost of storing MMX registers
363 in SImode and DImode */
364 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
365 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
366 in 32,64,128,256 and 512-bit */
367 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
368 in 32,64,128,256 and 512-bit */
369 3, 3, /* SSE->integer and integer->SSE moves */
370 /* End of register allocator costs. */
72bb85f8 371 },
d321551c 372
64766e8d
JH
373 COSTS_N_INSNS (1), /* cost of an add instruction */
374 COSTS_N_INSNS (1), /* cost of a lea instruction */
375 COSTS_N_INSNS (4), /* variable shift costs */
376 COSTS_N_INSNS (1), /* constant shift costs */
377 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
378 COSTS_N_INSNS (11), /* HI */
379 COSTS_N_INSNS (11), /* SI */
380 COSTS_N_INSNS (11), /* DI */
381 COSTS_N_INSNS (11)}, /* other */
382 0, /* cost of multiply per each bit set */
383 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
384 COSTS_N_INSNS (25), /* HI */
385 COSTS_N_INSNS (25), /* SI */
386 COSTS_N_INSNS (25), /* DI */
387 COSTS_N_INSNS (25)}, /* other */
388 COSTS_N_INSNS (3), /* cost of movsx */
389 COSTS_N_INSNS (2), /* cost of movzx */
390 8, /* "large" insn */
391 6, /* MOVE_RATIO */
64766e8d
JH
392 {2, 4, 2}, /* cost of loading integer registers
393 in QImode, HImode and SImode.
394 Relative to reg-reg move (2). */
395 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
396 {4, 8, 16, 32, 64}, /* cost of loading SSE register
397 in 32bit, 64bit, 128bit, 256bit and 512bit */
398 {4, 8, 16, 32, 64}, /* cost of storing SSE register
399 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 400 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 401 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
402 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
403 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
404 4, 4, /* Gather load static, per_elt. */
405 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
406 8, /* size of l1 cache. */
407 8, /* size of l2 cache */
408 0, /* size of prefetch block */
409 0, /* number of parallel prefetches */
410 2, /* Branch cost */
411 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
412 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
413 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
414 COSTS_N_INSNS (1), /* cost of FABS instruction. */
415 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
416 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
6065f444 417
c53c148c 418 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
419 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
420 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
421 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
c53c148c
JH
422 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
423 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
424 COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
425 COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
426 COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
427 COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
64766e8d
JH
428 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
429 pentium_memcpy,
430 pentium_memset,
f6fd8f2b
JH
431 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
432 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
433 "16:8:8", /* Loop alignment. */
434 "16:8:8", /* Jump alignment. */
435 "0:0:8", /* Label alignment. */
436 "16", /* Func alignment. */
64766e8d
JH
437};
438
439static const
440struct processor_costs lakemont_cost = {
72bb85f8 441 {
d321551c
L
442 /* Start of register allocator costs. integer->integer move cost is 2. */
443 6, /* cost for loading QImode using movzbl */
444 {2, 4, 2}, /* cost of loading integer registers
445 in QImode, HImode and SImode.
446 Relative to reg-reg move (2). */
447 {2, 4, 2}, /* cost of storing integer registers */
448 2, /* cost of reg,reg fld/fst */
449 {2, 2, 6}, /* cost of loading fp registers
450 in SFmode, DFmode and XFmode */
451 {4, 4, 6}, /* cost of storing fp registers
452 in SFmode, DFmode and XFmode */
453 8, /* cost of moving MMX register */
454 {8, 8}, /* cost of loading MMX registers
455 in SImode and DImode */
456 {8, 8}, /* cost of storing MMX registers
457 in SImode and DImode */
458 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
459 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
460 in 32,64,128,256 and 512-bit */
461 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
462 in 32,64,128,256 and 512-bit */
463 3, 3, /* SSE->integer and integer->SSE moves */
464 /* End of register allocator costs. */
72bb85f8 465 },
d321551c 466
64766e8d
JH
467 COSTS_N_INSNS (1), /* cost of an add instruction */
468 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
469 COSTS_N_INSNS (1), /* variable shift costs */
470 COSTS_N_INSNS (1), /* constant shift costs */
471 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
472 COSTS_N_INSNS (11), /* HI */
473 COSTS_N_INSNS (11), /* SI */
474 COSTS_N_INSNS (11), /* DI */
475 COSTS_N_INSNS (11)}, /* other */
476 0, /* cost of multiply per each bit set */
477 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
478 COSTS_N_INSNS (25), /* HI */
479 COSTS_N_INSNS (25), /* SI */
480 COSTS_N_INSNS (25), /* DI */
481 COSTS_N_INSNS (25)}, /* other */
482 COSTS_N_INSNS (3), /* cost of movsx */
483 COSTS_N_INSNS (2), /* cost of movzx */
484 8, /* "large" insn */
485 17, /* MOVE_RATIO */
64766e8d
JH
486 {2, 4, 2}, /* cost of loading integer registers
487 in QImode, HImode and SImode.
488 Relative to reg-reg move (2). */
489 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
490 {4, 8, 16, 32, 64}, /* cost of loading SSE register
491 in 32bit, 64bit, 128bit, 256bit and 512bit */
492 {4, 8, 16, 32, 64}, /* cost of storing SSE register
493 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 494 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 495 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
496 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
497 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
498 4, 4, /* Gather load static, per_elt. */
499 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
500 8, /* size of l1 cache. */
501 8, /* size of l2 cache */
502 0, /* size of prefetch block */
503 0, /* number of parallel prefetches */
504 2, /* Branch cost */
505 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
506 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
507 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
508 COSTS_N_INSNS (1), /* cost of FABS instruction. */
509 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
510 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
6065f444 511
c53c148c 512 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
513 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
514 COSTS_N_INSNS (5), /* cost of MULSS instruction. */
515 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
516 COSTS_N_INSNS (10), /* cost of FMA SS instruction. */
517 COSTS_N_INSNS (10), /* cost of FMA SD instruction. */
6065f444
JH
518 COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
519 COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
520 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
521 COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
64766e8d
JH
522 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
523 pentium_memcpy,
524 pentium_memset,
f6fd8f2b
JH
525 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
526 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
527 "16:8:8", /* Loop alignment. */
528 "16:8:8", /* Jump alignment. */
529 "0:0:8", /* Label alignment. */
530 "16", /* Func alignment. */
64766e8d
JH
531};
532
533/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
534 (we ensure the alignment). For small blocks inline loop is still a
535 noticeable win, for bigger blocks either rep movsl or rep movsb is
536 way to go. Rep movsb has apparently more expensive startup time in CPU,
537 but after 4K the difference is down in the noise. */
538static stringop_algs pentiumpro_memcpy[2] = {
539 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
540 {8192, rep_prefix_4_byte, false},
541 {-1, rep_prefix_1_byte, false}}},
542 DUMMY_STRINGOP_ALGS};
543static stringop_algs pentiumpro_memset[2] = {
544 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
545 {8192, rep_prefix_4_byte, false},
546 {-1, libcall, false}}},
547 DUMMY_STRINGOP_ALGS};
548static const
549struct processor_costs pentiumpro_cost = {
72bb85f8 550 {
d321551c
L
551 /* Start of register allocator costs. integer->integer move cost is 2. */
552 2, /* cost for loading QImode using movzbl */
553 {4, 4, 4}, /* cost of loading integer registers
554 in QImode, HImode and SImode.
555 Relative to reg-reg move (2). */
556 {2, 2, 2}, /* cost of storing integer registers */
557 2, /* cost of reg,reg fld/fst */
558 {2, 2, 6}, /* cost of loading fp registers
559 in SFmode, DFmode and XFmode */
560 {4, 4, 6}, /* cost of storing fp registers
561 in SFmode, DFmode and XFmode */
562 2, /* cost of moving MMX register */
563 {2, 2}, /* cost of loading MMX registers
564 in SImode and DImode */
565 {2, 2}, /* cost of storing MMX registers
566 in SImode and DImode */
567 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
568 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
569 in 32,64,128,256 and 512-bit */
570 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
571 in 32,64,128,256 and 512-bit */
572 3, 3, /* SSE->integer and integer->SSE moves */
573 /* End of register allocator costs. */
72bb85f8 574 },
d321551c 575
64766e8d
JH
576 COSTS_N_INSNS (1), /* cost of an add instruction */
577 COSTS_N_INSNS (1), /* cost of a lea instruction */
578 COSTS_N_INSNS (1), /* variable shift costs */
579 COSTS_N_INSNS (1), /* constant shift costs */
580 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
581 COSTS_N_INSNS (4), /* HI */
582 COSTS_N_INSNS (4), /* SI */
583 COSTS_N_INSNS (4), /* DI */
584 COSTS_N_INSNS (4)}, /* other */
585 0, /* cost of multiply per each bit set */
586 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
587 COSTS_N_INSNS (17), /* HI */
588 COSTS_N_INSNS (17), /* SI */
589 COSTS_N_INSNS (17), /* DI */
590 COSTS_N_INSNS (17)}, /* other */
591 COSTS_N_INSNS (1), /* cost of movsx */
592 COSTS_N_INSNS (1), /* cost of movzx */
593 8, /* "large" insn */
594 6, /* MOVE_RATIO */
64766e8d
JH
595 {4, 4, 4}, /* cost of loading integer registers
596 in QImode, HImode and SImode.
597 Relative to reg-reg move (2). */
598 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
599 {4, 8, 16, 32, 64}, /* cost of loading SSE register
600 in 32bit, 64bit, 128bit, 256bit and 512bit */
601 {4, 8, 16, 32, 64}, /* cost of storing SSE register
602 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 603 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 604 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
605 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
606 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
607 4, 4, /* Gather load static, per_elt. */
608 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
609 8, /* size of l1 cache. */
610 256, /* size of l2 cache */
611 32, /* size of prefetch block */
612 6, /* number of parallel prefetches */
613 2, /* Branch cost */
614 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (2), /* cost of FABS instruction. */
618 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
6065f444 620
c53c148c 621 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
622 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
623 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
624 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
625 COSTS_N_INSNS (7), /* cost of FMA SS instruction. */
626 COSTS_N_INSNS (7), /* cost of FMA SD instruction. */
6065f444
JH
627 COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
628 COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
629 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
630 COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
64766e8d
JH
631 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
632 pentiumpro_memcpy,
633 pentiumpro_memset,
f6fd8f2b
JH
634 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
635 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
636 "16", /* Loop alignment. */
637 "16:11:8", /* Jump alignment. */
638 "0:0:8", /* Label alignment. */
639 "16", /* Func alignment. */
64766e8d
JH
640};
641
642static stringop_algs geode_memcpy[2] = {
643 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
644 DUMMY_STRINGOP_ALGS};
645static stringop_algs geode_memset[2] = {
646 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
647 DUMMY_STRINGOP_ALGS};
648static const
649struct processor_costs geode_cost = {
72bb85f8 650 {
d321551c
L
651 /* Start of register allocator costs. integer->integer move cost is 2. */
652 2, /* cost for loading QImode using movzbl */
653 {2, 2, 2}, /* cost of loading integer registers
654 in QImode, HImode and SImode.
655 Relative to reg-reg move (2). */
656 {2, 2, 2}, /* cost of storing integer registers */
657 2, /* cost of reg,reg fld/fst */
658 {2, 2, 2}, /* cost of loading fp registers
659 in SFmode, DFmode and XFmode */
660 {4, 6, 6}, /* cost of storing fp registers
661 in SFmode, DFmode and XFmode */
662 2, /* cost of moving MMX register */
663 {2, 2}, /* cost of loading MMX registers
664 in SImode and DImode */
665 {2, 2}, /* cost of storing MMX registers
666 in SImode and DImode */
667 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
668 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
669 in 32,64,128,256 and 512-bit */
670 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
671 in 32,64,128,256 and 512-bit */
672 6, 6, /* SSE->integer and integer->SSE moves */
673 /* End of register allocator costs. */
72bb85f8 674 },
d321551c 675
64766e8d
JH
676 COSTS_N_INSNS (1), /* cost of an add instruction */
677 COSTS_N_INSNS (1), /* cost of a lea instruction */
678 COSTS_N_INSNS (2), /* variable shift costs */
679 COSTS_N_INSNS (1), /* constant shift costs */
680 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
681 COSTS_N_INSNS (4), /* HI */
682 COSTS_N_INSNS (7), /* SI */
683 COSTS_N_INSNS (7), /* DI */
684 COSTS_N_INSNS (7)}, /* other */
685 0, /* cost of multiply per each bit set */
686 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
687 COSTS_N_INSNS (23), /* HI */
688 COSTS_N_INSNS (39), /* SI */
689 COSTS_N_INSNS (39), /* DI */
690 COSTS_N_INSNS (39)}, /* other */
691 COSTS_N_INSNS (1), /* cost of movsx */
692 COSTS_N_INSNS (1), /* cost of movzx */
693 8, /* "large" insn */
694 4, /* MOVE_RATIO */
df41dbaf 695 {2, 2, 2}, /* cost of loading integer registers
64766e8d
JH
696 in QImode, HImode and SImode.
697 Relative to reg-reg move (2). */
df41dbaf 698 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
699 {2, 2, 8, 16, 32}, /* cost of loading SSE register
700 in 32bit, 64bit, 128bit, 256bit and 512bit */
701 {2, 2, 8, 16, 32}, /* cost of storing SSE register
702 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 703 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
df41dbaf 704 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
705 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
706 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
707 2, 2, /* Gather load static, per_elt. */
708 2, 2, /* Gather store static, per_elt. */
64766e8d
JH
709 64, /* size of l1 cache. */
710 128, /* size of l2 cache. */
711 32, /* size of prefetch block */
712 1, /* number of parallel prefetches */
713 1, /* Branch cost */
714 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
715 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
716 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
717 COSTS_N_INSNS (1), /* cost of FABS instruction. */
718 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
719 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
6065f444 720
c53c148c 721 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
722 COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
723 COSTS_N_INSNS (11), /* cost of MULSS instruction. */
724 COSTS_N_INSNS (11), /* cost of MULSD instruction. */
c53c148c
JH
725 COSTS_N_INSNS (17), /* cost of FMA SS instruction. */
726 COSTS_N_INSNS (17), /* cost of FMA SD instruction. */
6065f444
JH
727 COSTS_N_INSNS (47), /* cost of DIVSS instruction. */
728 COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
729 COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
730 COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
64766e8d
JH
731 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
732 geode_memcpy,
733 geode_memset,
f6fd8f2b
JH
734 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
735 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
736 NULL, /* Loop alignment. */
737 NULL, /* Jump alignment. */
738 NULL, /* Label alignment. */
739 NULL, /* Func alignment. */
64766e8d
JH
740};
741
742static stringop_algs k6_memcpy[2] = {
743 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
744 DUMMY_STRINGOP_ALGS};
745static stringop_algs k6_memset[2] = {
746 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
747 DUMMY_STRINGOP_ALGS};
748static const
749struct processor_costs k6_cost = {
72bb85f8 750 {
d321551c
L
751 /* Start of register allocator costs. integer->integer move cost is 2. */
752 3, /* cost for loading QImode using movzbl */
753 {4, 5, 4}, /* cost of loading integer registers
754 in QImode, HImode and SImode.
755 Relative to reg-reg move (2). */
756 {2, 3, 2}, /* cost of storing integer registers */
757 4, /* cost of reg,reg fld/fst */
758 {6, 6, 6}, /* cost of loading fp registers
759 in SFmode, DFmode and XFmode */
760 {4, 4, 4}, /* cost of storing fp registers
761 in SFmode, DFmode and XFmode */
762 2, /* cost of moving MMX register */
763 {2, 2}, /* cost of loading MMX registers
764 in SImode and DImode */
765 {2, 2}, /* cost of storing MMX registers
766 in SImode and DImode */
767 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
768 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
769 in 32,64,128,256 and 512-bit */
770 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
771 in 32,64,128,256 and 512-bit */
772 6, 6, /* SSE->integer and integer->SSE moves */
773 /* End of register allocator costs. */
72bb85f8 774 },
d321551c 775
64766e8d
JH
776 COSTS_N_INSNS (1), /* cost of an add instruction */
777 COSTS_N_INSNS (2), /* cost of a lea instruction */
778 COSTS_N_INSNS (1), /* variable shift costs */
779 COSTS_N_INSNS (1), /* constant shift costs */
780 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
781 COSTS_N_INSNS (3), /* HI */
782 COSTS_N_INSNS (3), /* SI */
783 COSTS_N_INSNS (3), /* DI */
784 COSTS_N_INSNS (3)}, /* other */
785 0, /* cost of multiply per each bit set */
786 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
787 COSTS_N_INSNS (18), /* HI */
788 COSTS_N_INSNS (18), /* SI */
789 COSTS_N_INSNS (18), /* DI */
790 COSTS_N_INSNS (18)}, /* other */
791 COSTS_N_INSNS (2), /* cost of movsx */
792 COSTS_N_INSNS (2), /* cost of movzx */
793 8, /* "large" insn */
794 4, /* MOVE_RATIO */
64766e8d
JH
795 {4, 5, 4}, /* cost of loading integer registers
796 in QImode, HImode and SImode.
797 Relative to reg-reg move (2). */
798 {2, 3, 2}, /* cost of storing integer registers */
d321551c
L
799 {2, 2, 8, 16, 32}, /* cost of loading SSE register
800 in 32bit, 64bit, 128bit, 256bit and 512bit */
801 {2, 2, 8, 16, 32}, /* cost of storing SSE register
802 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 803 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
df41dbaf 804 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
805 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
806 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
807 2, 2, /* Gather load static, per_elt. */
808 2, 2, /* Gather store static, per_elt. */
64766e8d
JH
809 32, /* size of l1 cache. */
810 32, /* size of l2 cache. Some models
811 have integrated l2 cache, but
812 optimizing for k6 is not important
813 enough to worry about that. */
814 32, /* size of prefetch block */
815 1, /* number of parallel prefetches */
816 1, /* Branch cost */
817 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
818 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
819 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
820 COSTS_N_INSNS (2), /* cost of FABS instruction. */
821 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
822 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
6065f444 823
c53c148c 824 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
825 COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
826 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
827 COSTS_N_INSNS (2), /* cost of MULSD instruction. */
c53c148c
JH
828 COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
829 COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
6065f444
JH
830 COSTS_N_INSNS (56), /* cost of DIVSS instruction. */
831 COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
832 COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
833 COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
64766e8d
JH
834 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
835 k6_memcpy,
836 k6_memset,
f6fd8f2b
JH
837 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
838 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
839 "32:8:8", /* Loop alignment. */
840 "32:8:8", /* Jump alignment. */
841 "0:0:8", /* Label alignment. */
842 "32", /* Func alignment. */
64766e8d
JH
843};
844
845/* For some reason, Athlon deals better with REP prefix (relative to loops)
846 compared to K8. Alignment becomes important after 8 bytes for memcpy and
847 128 bytes for memset. */
848static stringop_algs athlon_memcpy[2] = {
849 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
850 DUMMY_STRINGOP_ALGS};
851static stringop_algs athlon_memset[2] = {
852 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
853 DUMMY_STRINGOP_ALGS};
854static const
855struct processor_costs athlon_cost = {
72bb85f8 856 {
d321551c
L
857 /* Start of register allocator costs. integer->integer move cost is 2. */
858 4, /* cost for loading QImode using movzbl */
859 {3, 4, 3}, /* cost of loading integer registers
860 in QImode, HImode and SImode.
861 Relative to reg-reg move (2). */
862 {3, 4, 3}, /* cost of storing integer registers */
863 4, /* cost of reg,reg fld/fst */
864 {4, 4, 12}, /* cost of loading fp registers
865 in SFmode, DFmode and XFmode */
866 {6, 6, 8}, /* cost of storing fp registers
867 in SFmode, DFmode and XFmode */
868 2, /* cost of moving MMX register */
869 {4, 4}, /* cost of loading MMX registers
870 in SImode and DImode */
871 {4, 4}, /* cost of storing MMX registers
872 in SImode and DImode */
873 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
874 {4, 4, 12, 12, 24}, /* cost of loading SSE registers
875 in 32,64,128,256 and 512-bit */
876 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
877 in 32,64,128,256 and 512-bit */
878 5, 5, /* SSE->integer and integer->SSE moves */
879 /* End of register allocator costs. */
72bb85f8 880 },
d321551c 881
64766e8d
JH
882 COSTS_N_INSNS (1), /* cost of an add instruction */
883 COSTS_N_INSNS (2), /* cost of a lea instruction */
884 COSTS_N_INSNS (1), /* variable shift costs */
885 COSTS_N_INSNS (1), /* constant shift costs */
886 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
887 COSTS_N_INSNS (5), /* HI */
888 COSTS_N_INSNS (5), /* SI */
889 COSTS_N_INSNS (5), /* DI */
890 COSTS_N_INSNS (5)}, /* other */
891 0, /* cost of multiply per each bit set */
892 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
893 COSTS_N_INSNS (26), /* HI */
894 COSTS_N_INSNS (42), /* SI */
895 COSTS_N_INSNS (74), /* DI */
896 COSTS_N_INSNS (74)}, /* other */
897 COSTS_N_INSNS (1), /* cost of movsx */
898 COSTS_N_INSNS (1), /* cost of movzx */
899 8, /* "large" insn */
900 9, /* MOVE_RATIO */
64766e8d
JH
901 {3, 4, 3}, /* cost of loading integer registers
902 in QImode, HImode and SImode.
903 Relative to reg-reg move (2). */
904 {3, 4, 3}, /* cost of storing integer registers */
d321551c
L
905 {4, 4, 12, 12, 24}, /* cost of loading SSE register
906 in 32bit, 64bit, 128bit, 256bit and 512bit */
907 {4, 4, 10, 10, 20}, /* cost of storing SSE register
908 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 909 {4, 4, 12, 12, 24}, /* cost of unaligned loads. */
b7167993 910 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
d321551c
L
911 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
912 5, /* cost of moving SSE register to integer. */
a4fe6139
JH
913 4, 4, /* Gather load static, per_elt. */
914 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
915 64, /* size of l1 cache. */
916 256, /* size of l2 cache. */
917 64, /* size of prefetch block */
918 6, /* number of parallel prefetches */
919 5, /* Branch cost */
920 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
921 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
922 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
923 COSTS_N_INSNS (2), /* cost of FABS instruction. */
924 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
925 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 926
c53c148c 927 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
928 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
929 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
930 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
931 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
932 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
933 /* 11-16 */
934 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
935 COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
936 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
937 COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
64766e8d
JH
938 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
939 athlon_memcpy,
940 athlon_memset,
f6fd8f2b
JH
941 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
942 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
943 "16:8:8", /* Loop alignment. */
944 "16:8:8", /* Jump alignment. */
945 "0:0:8", /* Label alignment. */
946 "16", /* Func alignment. */
64766e8d
JH
947};
948
949/* K8 has optimized REP instruction for medium sized blocks, but for very
950 small blocks it is better to use loop. For large blocks, libcall can
951 do nontemporary accesses and beat inline considerably. */
952static stringop_algs k8_memcpy[2] = {
953 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
954 {-1, rep_prefix_4_byte, false}}},
955 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
956 {-1, libcall, false}}}};
957static stringop_algs k8_memset[2] = {
958 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
959 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
960 {libcall, {{48, unrolled_loop, false},
961 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
962static const
963struct processor_costs k8_cost = {
72bb85f8 964 {
d321551c
L
965 /* Start of register allocator costs. integer->integer move cost is 2. */
966 4, /* cost for loading QImode using movzbl */
967 {3, 4, 3}, /* cost of loading integer registers
968 in QImode, HImode and SImode.
969 Relative to reg-reg move (2). */
970 {3, 4, 3}, /* cost of storing integer registers */
971 4, /* cost of reg,reg fld/fst */
972 {4, 4, 12}, /* cost of loading fp registers
973 in SFmode, DFmode and XFmode */
974 {6, 6, 8}, /* cost of storing fp registers
975 in SFmode, DFmode and XFmode */
976 2, /* cost of moving MMX register */
977 {3, 3}, /* cost of loading MMX registers
978 in SImode and DImode */
979 {4, 4}, /* cost of storing MMX registers
980 in SImode and DImode */
981 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
982 {4, 3, 12, 12, 24}, /* cost of loading SSE registers
983 in 32,64,128,256 and 512-bit */
984 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
985 in 32,64,128,256 and 512-bit */
986 5, 5, /* SSE->integer and integer->SSE moves */
987 /* End of register allocator costs. */
72bb85f8 988 },
d321551c 989
64766e8d
JH
990 COSTS_N_INSNS (1), /* cost of an add instruction */
991 COSTS_N_INSNS (2), /* cost of a lea instruction */
992 COSTS_N_INSNS (1), /* variable shift costs */
993 COSTS_N_INSNS (1), /* constant shift costs */
994 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
995 COSTS_N_INSNS (4), /* HI */
996 COSTS_N_INSNS (3), /* SI */
997 COSTS_N_INSNS (4), /* DI */
998 COSTS_N_INSNS (5)}, /* other */
999 0, /* cost of multiply per each bit set */
1000 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1001 COSTS_N_INSNS (26), /* HI */
1002 COSTS_N_INSNS (42), /* SI */
1003 COSTS_N_INSNS (74), /* DI */
1004 COSTS_N_INSNS (74)}, /* other */
1005 COSTS_N_INSNS (1), /* cost of movsx */
1006 COSTS_N_INSNS (1), /* cost of movzx */
1007 8, /* "large" insn */
1008 9, /* MOVE_RATIO */
64766e8d
JH
1009 {3, 4, 3}, /* cost of loading integer registers
1010 in QImode, HImode and SImode.
1011 Relative to reg-reg move (2). */
1012 {3, 4, 3}, /* cost of storing integer registers */
d321551c
L
1013 {4, 3, 12, 12, 24}, /* cost of loading SSE register
1014 in 32bit, 64bit, 128bit, 256bit and 512bit */
1015 {4, 4, 10, 10, 20}, /* cost of storing SSE register
1016 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1017 {4, 3, 12, 12, 24}, /* cost of unaligned loads. */
b7167993 1018 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
d321551c
L
1019 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1020 5, /* cost of moving SSE register to integer. */
a4fe6139
JH
1021 4, 4, /* Gather load static, per_elt. */
1022 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
1023 64, /* size of l1 cache. */
1024 512, /* size of l2 cache. */
1025 64, /* size of prefetch block */
1026 /* New AMD processors never drop prefetches; if they cannot be performed
1027 immediately, they are queued. We set number of simultaneous prefetches
1028 to a large constant to reflect this (it probably is not a good idea not
1029 to limit number of prefetches at all, as their execution also takes some
1030 time). */
1031 100, /* number of parallel prefetches */
1032 3, /* Branch cost */
1033 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1034 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1035 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1036 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1037 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1038 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1039
c53c148c 1040 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1041 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1042 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1043 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1044 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
1045 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
1046 /* 11-16 */
1047 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
1048 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
1049 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
1050 COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
64766e8d
JH
1051 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1052 k8_memcpy,
1053 k8_memset,
f6fd8f2b
JH
1054 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1055 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1056 "16:8:8", /* Loop alignment. */
1057 "16:8:8", /* Jump alignment. */
1058 "0:0:8", /* Label alignment. */
1059 "16", /* Func alignment. */
64766e8d
JH
1060};
1061
1062/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1063 very small blocks it is better to use loop. For large blocks, libcall can
1064 do nontemporary accesses and beat inline considerably. */
1065static stringop_algs amdfam10_memcpy[2] = {
1066 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1067 {-1, rep_prefix_4_byte, false}}},
1068 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1069 {-1, libcall, false}}}};
1070static stringop_algs amdfam10_memset[2] = {
1071 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1072 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1073 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1074 {-1, libcall, false}}}};
1075struct processor_costs amdfam10_cost = {
72bb85f8 1076 {
d321551c 1077 /* Start of register allocator costs. integer->integer move cost is 2. */
64766e8d
JH
1078 4, /* cost for loading QImode using movzbl */
1079 {3, 4, 3}, /* cost of loading integer registers
1080 in QImode, HImode and SImode.
1081 Relative to reg-reg move (2). */
1082 {3, 4, 3}, /* cost of storing integer registers */
1083 4, /* cost of reg,reg fld/fst */
1084 {4, 4, 12}, /* cost of loading fp registers
1085 in SFmode, DFmode and XFmode */
1086 {6, 6, 8}, /* cost of storing fp registers
1087 in SFmode, DFmode and XFmode */
1088 2, /* cost of moving MMX register */
1089 {3, 3}, /* cost of loading MMX registers
1090 in SImode and DImode */
1091 {4, 4}, /* cost of storing MMX registers
1092 in SImode and DImode */
df41dbaf
JH
1093 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1094 {4, 4, 3, 6, 12}, /* cost of loading SSE registers
1095 in 32,64,128,256 and 512-bit */
df41dbaf
JH
1096 {4, 4, 5, 10, 20}, /* cost of storing SSE registers
1097 in 32,64,128,256 and 512-bit */
df41dbaf 1098 3, 3, /* SSE->integer and integer->SSE moves */
d321551c 1099
64766e8d
JH
1100 /* On K8:
1101 MOVD reg64, xmmreg Double FSTORE 4
1102 MOVD reg32, xmmreg Double FSTORE 4
1103 On AMDFAM10:
1104 MOVD reg64, xmmreg Double FADD 3
1105 1/1 1/1
1106 MOVD reg32, xmmreg Double FADD 3
1107 1/1 1/1 */
d321551c 1108 /* End of register allocator costs. */
72bb85f8 1109 },
d321551c
L
1110
1111 COSTS_N_INSNS (1), /* cost of an add instruction */
1112 COSTS_N_INSNS (2), /* cost of a lea instruction */
1113 COSTS_N_INSNS (1), /* variable shift costs */
1114 COSTS_N_INSNS (1), /* constant shift costs */
1115 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1116 COSTS_N_INSNS (4), /* HI */
1117 COSTS_N_INSNS (3), /* SI */
1118 COSTS_N_INSNS (4), /* DI */
1119 COSTS_N_INSNS (5)}, /* other */
1120 0, /* cost of multiply per each bit set */
1121 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1122 COSTS_N_INSNS (35), /* HI */
1123 COSTS_N_INSNS (51), /* SI */
1124 COSTS_N_INSNS (83), /* DI */
1125 COSTS_N_INSNS (83)}, /* other */
1126 COSTS_N_INSNS (1), /* cost of movsx */
1127 COSTS_N_INSNS (1), /* cost of movzx */
1128 8, /* "large" insn */
1129 9, /* MOVE_RATIO */
1130 {3, 4, 3}, /* cost of loading integer registers
1131 in QImode, HImode and SImode.
1132 Relative to reg-reg move (2). */
1133 {3, 4, 3}, /* cost of storing integer registers */
1134 {4, 4, 3, 6, 12}, /* cost of loading SSE register
1135 in 32bit, 64bit, 128bit, 256bit and 512bit */
1136 {4, 4, 5, 10, 20}, /* cost of storing SSE register
1137 in 32bit, 64bit, 128bit, 256bit and 512bit */
1138 {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
1139 {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
1140 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1141 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
1142 4, 4, /* Gather load static, per_elt. */
1143 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
1144 64, /* size of l1 cache. */
1145 512, /* size of l2 cache. */
1146 64, /* size of prefetch block */
1147 /* New AMD processors never drop prefetches; if they cannot be performed
1148 immediately, they are queued. We set number of simultaneous prefetches
1149 to a large constant to reflect this (it probably is not a good idea not
1150 to limit number of prefetches at all, as their execution also takes some
1151 time). */
1152 100, /* number of parallel prefetches */
1153 2, /* Branch cost */
1154 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1155 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1156 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1157 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1158 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1159 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1160
c53c148c 1161 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1162 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1163 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1164 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1165 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
1167 /* 11-16 */
1168 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
1169 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
1170 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
1171 COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
64766e8d
JH
1172 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1173 amdfam10_memcpy,
1174 amdfam10_memset,
f6fd8f2b
JH
1175 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
1176 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1177 "32:25:8", /* Loop alignment. */
1178 "32:8:8", /* Jump alignment. */
1179 "0:0:8", /* Label alignment. */
1180 "32", /* Func alignment. */
64766e8d
JH
1181};
1182
c727b835 1183/* BDVER has optimized REP instruction for medium sized blocks, but for
64766e8d
JH
1184 very small blocks it is better to use loop. For large blocks, libcall
1185 can do nontemporary accesses and beat inline considerably. */
c727b835 1186static stringop_algs bdver_memcpy[2] = {
64766e8d
JH
1187 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1188 {-1, rep_prefix_4_byte, false}}},
1189 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1190 {-1, libcall, false}}}};
c727b835 1191static stringop_algs bdver_memset[2] = {
64766e8d
JH
1192 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1193 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1194 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1195 {-1, libcall, false}}}};
1196
c727b835 1197const struct processor_costs bdver_cost = {
72bb85f8 1198 {
d321551c
L
1199 /* Start of register allocator costs. integer->integer move cost is 2. */
1200 8, /* cost for loading QImode using movzbl */
1201 {8, 8, 8}, /* cost of loading integer registers
1202 in QImode, HImode and SImode.
1203 Relative to reg-reg move (2). */
1204 {8, 8, 8}, /* cost of storing integer registers */
1205 4, /* cost of reg,reg fld/fst */
1206 {12, 12, 28}, /* cost of loading fp registers
1207 in SFmode, DFmode and XFmode */
1208 {10, 10, 18}, /* cost of storing fp registers
1209 in SFmode, DFmode and XFmode */
1210 4, /* cost of moving MMX register */
1211 {12, 12}, /* cost of loading MMX registers
1212 in SImode and DImode */
1213 {10, 10}, /* cost of storing MMX registers
1214 in SImode and DImode */
1215 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1216 {12, 12, 10, 40, 60}, /* cost of loading SSE registers
1217 in 32,64,128,256 and 512-bit */
1218 {10, 10, 10, 40, 60}, /* cost of storing SSE registers
1219 in 32,64,128,256 and 512-bit */
1220 16, 20, /* SSE->integer and integer->SSE moves */
1221 /* End of register allocator costs. */
72bb85f8 1222 },
d321551c 1223
64766e8d
JH
1224 COSTS_N_INSNS (1), /* cost of an add instruction */
1225 COSTS_N_INSNS (1), /* cost of a lea instruction */
1226 COSTS_N_INSNS (1), /* variable shift costs */
1227 COSTS_N_INSNS (1), /* constant shift costs */
1228 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1229 COSTS_N_INSNS (4), /* HI */
1230 COSTS_N_INSNS (4), /* SI */
1231 COSTS_N_INSNS (6), /* DI */
1232 COSTS_N_INSNS (6)}, /* other */
1233 0, /* cost of multiply per each bit set */
1234 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1235 COSTS_N_INSNS (35), /* HI */
1236 COSTS_N_INSNS (51), /* SI */
1237 COSTS_N_INSNS (83), /* DI */
1238 COSTS_N_INSNS (83)}, /* other */
1239 COSTS_N_INSNS (1), /* cost of movsx */
1240 COSTS_N_INSNS (1), /* cost of movzx */
1241 8, /* "large" insn */
1242 9, /* MOVE_RATIO */
df41dbaf 1243 {8, 8, 8}, /* cost of loading integer registers
64766e8d
JH
1244 in QImode, HImode and SImode.
1245 Relative to reg-reg move (2). */
df41dbaf 1246 {8, 8, 8}, /* cost of storing integer registers */
d321551c
L
1247 {12, 12, 10, 40, 60}, /* cost of loading SSE register
1248 in 32bit, 64bit, 128bit, 256bit and 512bit */
1249 {10, 10, 10, 40, 60}, /* cost of storing SSE register
1250 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1251 {12, 12, 10, 40, 60}, /* cost of unaligned loads. */
b7167993 1252 {10, 10, 10, 40, 60}, /* cost of unaligned stores. */
d321551c
L
1253 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1254 16, /* cost of moving SSE register to integer. */
a4fe6139
JH
1255 12, 12, /* Gather load static, per_elt. */
1256 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
1257 16, /* size of l1 cache. */
1258 2048, /* size of l2 cache. */
1259 64, /* size of prefetch block */
1260 /* New AMD processors never drop prefetches; if they cannot be performed
1261 immediately, they are queued. We set number of simultaneous prefetches
1262 to a large constant to reflect this (it probably is not a good idea not
1263 to limit number of prefetches at all, as their execution also takes some
1264 time). */
1265 100, /* number of parallel prefetches */
1266 2, /* Branch cost */
1267 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1268 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1269 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1270 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1271 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1272 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
6065f444 1273
c53c148c 1274 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1275 COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
1276 COSTS_N_INSNS (6), /* cost of MULSS instruction. */
1277 COSTS_N_INSNS (6), /* cost of MULSD instruction. */
c53c148c
JH
1278 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
1279 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
1280 /* 9-24 */
1281 COSTS_N_INSNS (24), /* cost of DIVSS instruction. */
1282 /* 9-27 */
1283 COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
1284 COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
1285 COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
64766e8d 1286 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
c727b835
RB
1287 bdver_memcpy,
1288 bdver_memset,
f6fd8f2b
JH
1289 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1290 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1291 "16:11:8", /* Loop alignment. */
1292 "16:8:8", /* Jump alignment. */
1293 "0:0:8", /* Label alignment. */
1294 "11", /* Func alignment. */
64766e8d
JH
1295};
1296
1297
1298/* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1299 very small blocks it is better to use loop. For large blocks, libcall
1300 can do nontemporary accesses and beat inline considerably. */
1301static stringop_algs znver1_memcpy[2] = {
1302 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1303 {-1, rep_prefix_4_byte, false}}},
1304 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1305 {-1, libcall, false}}}};
1306static stringop_algs znver1_memset[2] = {
1307 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1308 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1309 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1310 {-1, libcall, false}}}};
1311struct processor_costs znver1_cost = {
72bb85f8 1312 {
d321551c
L
1313 /* Start of register allocator costs. integer->integer move cost is 2. */
1314
1315 /* reg-reg moves are done by renaming and thus they are even cheaper than
1316 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
1317 to doubles of latencies, we do not model this correctly. It does not
1318 seem to make practical difference to bump prices up even more. */
1319 6, /* cost for loading QImode using
1320 movzbl. */
1321 {6, 6, 6}, /* cost of loading integer registers
1322 in QImode, HImode and SImode.
1323 Relative to reg-reg move (2). */
1324 {8, 8, 8}, /* cost of storing integer
1325 registers. */
1326 2, /* cost of reg,reg fld/fst. */
1327 {6, 6, 16}, /* cost of loading fp registers
1328 in SFmode, DFmode and XFmode. */
1329 {8, 8, 16}, /* cost of storing fp registers
1330 in SFmode, DFmode and XFmode. */
1331 2, /* cost of moving MMX register. */
1332 {6, 6}, /* cost of loading MMX registers
1333 in SImode and DImode. */
1334 {8, 8}, /* cost of storing MMX registers
1335 in SImode and DImode. */
1336 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1337 {6, 6, 6, 12, 24}, /* cost of loading SSE registers
1338 in 32,64,128,256 and 512-bit. */
1339 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
1340 in 32,64,128,256 and 512-bit. */
1341 6, 6, /* SSE->integer and integer->SSE moves. */
1342 /* End of register allocator costs. */
72bb85f8 1343 },
d321551c 1344
64766e8d
JH
1345 COSTS_N_INSNS (1), /* cost of an add instruction. */
1346 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1347 COSTS_N_INSNS (1), /* variable shift costs. */
1348 COSTS_N_INSNS (1), /* constant shift costs. */
1349 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1350 COSTS_N_INSNS (3), /* HI. */
1351 COSTS_N_INSNS (3), /* SI. */
6065f444
JH
1352 COSTS_N_INSNS (3), /* DI. */
1353 COSTS_N_INSNS (3)}, /* other. */
64766e8d
JH
1354 0, /* cost of multiply per each bit
1355 set. */
6065f444
JH
1356 /* Depending on parameters, idiv can get faster on ryzen. This is upper
1357 bound. */
1358 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
1359 COSTS_N_INSNS (22), /* HI. */
1360 COSTS_N_INSNS (30), /* SI. */
1361 COSTS_N_INSNS (45), /* DI. */
1362 COSTS_N_INSNS (45)}, /* other. */
64766e8d
JH
1363 COSTS_N_INSNS (1), /* cost of movsx. */
1364 COSTS_N_INSNS (1), /* cost of movzx. */
1365 8, /* "large" insn. */
1366 9, /* MOVE_RATIO. */
01118373 1367 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
1368 in QImode, HImode and SImode.
1369 Relative to reg-reg move (2). */
01118373 1370 {8, 8, 8}, /* cost of storing integer
64766e8d 1371 registers. */
d321551c
L
1372 {6, 6, 6, 12, 24}, /* cost of loading SSE register
1373 in 32bit, 64bit, 128bit, 256bit and 512bit */
1374 {8, 8, 8, 16, 32}, /* cost of storing SSE register
1375 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1376 {6, 6, 6, 12, 24}, /* cost of unaligned loads. */
b7167993 1377 {8, 8, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
1378 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1379 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
1380 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1381 throughput 12. Approx 9 uops do not depend on vector size and every load
1382 is 7 uops. */
1383 18, 8, /* Gather load static, per_elt. */
1384 18, 10, /* Gather store static, per_elt. */
64766e8d
JH
1385 32, /* size of l1 cache. */
1386 512, /* size of l2 cache. */
1387 64, /* size of prefetch block. */
1388 /* New AMD processors never drop prefetches; if they cannot be performed
1389 immediately, they are queued. We set number of simultaneous prefetches
1390 to a large constant to reflect this (it probably is not a good idea not
1391 to limit number of prefetches at all, as their execution also takes some
1392 time). */
1393 100, /* number of parallel prefetches. */
1394 3, /* Branch cost. */
6065f444
JH
1395 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1396 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1397 /* Latency of fdiv is 8-15. */
1398 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1399 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1400 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1401 /* Latency of fsqrt is 4-10. */
1402 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1403
c53c148c 1404 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
1405 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1406 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
1407 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1408 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1409 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
1410 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1411 /* 9-13 */
1412 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1413 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1414 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
64766e8d
JH
1415 /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
1416 and it can execute 2 integer additions and 2 multiplications thus
1417 reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
1418 that 4 works better than 6 probably due to register pressure.
1419
1420 Integer vector operations are taken by FP unit and execute 3 vector
1421 plus/minus operations per cycle but only one multiply. This is adjusted
1422 in ix86_reassociation_width. */
1423 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1424 znver1_memcpy,
1425 znver1_memset,
f6fd8f2b
JH
1426 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1427 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1428 "16", /* Loop alignment. */
1429 "16", /* Jump alignment. */
1430 "0:0:8", /* Label alignment. */
1431 "16", /* Func alignment. */
64766e8d
JH
1432};
1433
2901f42f
VK
1434/* ZNVER2 has optimized REP instruction for medium sized blocks, but for
1435 very small blocks it is better to use loop. For large blocks, libcall
1436 can do nontemporary accesses and beat inline considerably. */
1437static stringop_algs znver2_memcpy[2] = {
1438 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1439 {-1, rep_prefix_4_byte, false}}},
187dd65d 1440 {libcall, {{16, loop, false}, {64, rep_prefix_4_byte, false},
2901f42f
VK
1441 {-1, libcall, false}}}};
1442static stringop_algs znver2_memset[2] = {
1443 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1444 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
187dd65d 1445 {libcall, {{24, rep_prefix_4_byte, false}, {128, rep_prefix_8_byte, false},
2901f42f
VK
1446 {-1, libcall, false}}}};
1447
1448struct processor_costs znver2_cost = {
72bb85f8 1449 {
d321551c 1450 /* Start of register allocator costs. integer->integer move cost is 2. */
2901f42f
VK
1451
1452 /* reg-reg moves are done by renaming and thus they are even cheaper than
1453 1 cycle. Because reg-reg move cost is 2 and following tables correspond
1454 to doubles of latencies, we do not model this correctly. It does not
1455 seem to make practical difference to bump prices up even more. */
1456 6, /* cost for loading QImode using
1457 movzbl. */
1458 {6, 6, 6}, /* cost of loading integer registers
1459 in QImode, HImode and SImode.
1460 Relative to reg-reg move (2). */
1461 {8, 8, 8}, /* cost of storing integer
1462 registers. */
1463 2, /* cost of reg,reg fld/fst. */
1464 {6, 6, 16}, /* cost of loading fp registers
1465 in SFmode, DFmode and XFmode. */
1466 {8, 8, 16}, /* cost of storing fp registers
1467 in SFmode, DFmode and XFmode. */
1468 2, /* cost of moving MMX register. */
1469 {6, 6}, /* cost of loading MMX registers
1470 in SImode and DImode. */
1471 {8, 8}, /* cost of storing MMX registers
1472 in SImode and DImode. */
187dd65d 1473 2, 2, 3, /* cost of moving XMM,YMM,ZMM
2901f42f 1474 register. */
187dd65d 1475 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
2901f42f 1476 in 32,64,128,256 and 512-bit. */
2901f42f
VK
1477 {8, 8, 8, 8, 16}, /* cost of storing SSE registers
1478 in 32,64,128,256 and 512-bit. */
2901f42f
VK
1479 6, 6, /* SSE->integer and integer->SSE
1480 moves. */
d321551c 1481 /* End of register allocator costs. */
72bb85f8 1482 },
d321551c
L
1483
1484 COSTS_N_INSNS (1), /* cost of an add instruction. */
1485 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1486 COSTS_N_INSNS (1), /* variable shift costs. */
1487 COSTS_N_INSNS (1), /* constant shift costs. */
1488 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1489 COSTS_N_INSNS (3), /* HI. */
1490 COSTS_N_INSNS (3), /* SI. */
1491 COSTS_N_INSNS (3), /* DI. */
1492 COSTS_N_INSNS (3)}, /* other. */
1493 0, /* cost of multiply per each bit
1494 set. */
1495 /* Depending on parameters, idiv can get faster on ryzen. This is upper
1496 bound. */
1497 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
1498 COSTS_N_INSNS (22), /* HI. */
1499 COSTS_N_INSNS (30), /* SI. */
1500 COSTS_N_INSNS (45), /* DI. */
1501 COSTS_N_INSNS (45)}, /* other. */
1502 COSTS_N_INSNS (1), /* cost of movsx. */
1503 COSTS_N_INSNS (1), /* cost of movzx. */
1504 8, /* "large" insn. */
1505 9, /* MOVE_RATIO. */
1506 {6, 6, 6}, /* cost of loading integer registers
1507 in QImode, HImode and SImode.
1508 Relative to reg-reg move (2). */
1509 {8, 8, 8}, /* cost of storing integer
1510 registers. */
1511 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
1512 in 32bit, 64bit, 128bit, 256bit and 512bit */
1513 {8, 8, 8, 8, 16}, /* cost of storing SSE register
1514 in 32bit, 64bit, 128bit, 256bit and 512bit */
1515 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
1516 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
1517 2, 2, 3, /* cost of moving XMM,YMM,ZMM
1518 register. */
1519 6, /* cost of moving SSE register to integer. */
2901f42f
VK
1520 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1521 throughput 12. Approx 9 uops do not depend on vector size and every load
1522 is 7 uops. */
1523 18, 8, /* Gather load static, per_elt. */
1524 18, 10, /* Gather store static, per_elt. */
1525 32, /* size of l1 cache. */
1526 512, /* size of l2 cache. */
1527 64, /* size of prefetch block. */
1528 /* New AMD processors never drop prefetches; if they cannot be performed
1529 immediately, they are queued. We set number of simultaneous prefetches
1530 to a large constant to reflect this (it probably is not a good idea not
1531 to limit number of prefetches at all, as their execution also takes some
1532 time). */
1533 100, /* number of parallel prefetches. */
1534 3, /* Branch cost. */
1535 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1536 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1537 /* Latency of fdiv is 8-15. */
1538 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1539 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1540 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1541 /* Latency of fsqrt is 4-10. */
1542 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1543
1544 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1545 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1546 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
187dd65d 1547 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
2901f42f
VK
1548 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1549 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
1550 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1551 /* 9-13. */
1552 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1553 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1554 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
1555 /* Zen can execute 4 integer operations per cycle. FP operations
1556 take 3 cycles and it can execute 2 integer additions and 2
1557 multiplications thus reassociation may make sense up to with of 6.
1558 SPEC2k6 bencharks suggests
1559 that 4 works better than 6 probably due to register pressure.
1560
1561 Integer vector operations are taken by FP unit and execute 3 vector
1562 plus/minus operations per cycle but only one multiply. This is adjusted
1563 in ix86_reassociation_width. */
1564 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1565 znver2_memcpy,
1566 znver2_memset,
1567 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1568 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
1569 "16", /* Loop alignment. */
1570 "16", /* Jump alignment. */
1571 "0:0:8", /* Label alignment. */
1572 "16", /* Func alignment. */
1573};
1574
c234d831
UB
1575/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
1576static stringop_algs skylake_memcpy[2] = {
1577 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
6e559c70 1578 {libcall, {{16, loop, false}, {512, unrolled_loop, false},
c234d831
UB
1579 {-1, libcall, false}}}};
1580
1581static stringop_algs skylake_memset[2] = {
1582 {libcall, {{6, loop_1_byte, true},
1583 {24, loop, true},
1584 {8192, rep_prefix_4_byte, true},
1585 {-1, libcall, false}}},
6e559c70 1586 {libcall, {{24, loop, true}, {512, unrolled_loop, false},
c234d831
UB
1587 {-1, libcall, false}}}};
1588
1589static const
1590struct processor_costs skylake_cost = {
72bb85f8 1591 {
d321551c
L
1592 /* Start of register allocator costs. integer->integer move cost is 2. */
1593 6, /* cost for loading QImode using movzbl */
1594 {4, 4, 4}, /* cost of loading integer registers
1595 in QImode, HImode and SImode.
1596 Relative to reg-reg move (2). */
1597 {6, 6, 3}, /* cost of storing integer registers */
1598 2, /* cost of reg,reg fld/fst */
1599 {6, 6, 8}, /* cost of loading fp registers
1600 in SFmode, DFmode and XFmode */
1601 {6, 6, 10}, /* cost of storing fp registers
1602 in SFmode, DFmode and XFmode */
1603 2, /* cost of moving MMX register */
1604 {6, 6}, /* cost of loading MMX registers
1605 in SImode and DImode */
1606 {6, 6}, /* cost of storing MMX registers
1607 in SImode and DImode */
1608 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1609 {6, 6, 6, 10, 20}, /* cost of loading SSE registers
1610 in 32,64,128,256 and 512-bit */
1611 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1612 in 32,64,128,256 and 512-bit */
4e9ad7c9 1613 6, 6, /* SSE->integer and integer->SSE moves */
d321551c 1614 /* End of register allocator costs. */
72bb85f8 1615 },
d321551c 1616
c234d831
UB
1617 COSTS_N_INSNS (1), /* cost of an add instruction */
1618 COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
1619 COSTS_N_INSNS (1), /* variable shift costs */
1620 COSTS_N_INSNS (1), /* constant shift costs */
1621 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1622 COSTS_N_INSNS (4), /* HI */
1623 COSTS_N_INSNS (3), /* SI */
a2ef9558
MT
1624 COSTS_N_INSNS (3), /* DI */
1625 COSTS_N_INSNS (3)}, /* other */
c234d831 1626 0, /* cost of multiply per each bit set */
02308bd3
MT
1627 /* Expanding div/mod currently doesn't consider parallelism. So the cost
1628 model is not realistic. We compensate by increasing the latencies a bit. */
1629 {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
1630 COSTS_N_INSNS (11), /* HI */
1631 COSTS_N_INSNS (14), /* SI */
c234d831
UB
1632 COSTS_N_INSNS (76), /* DI */
1633 COSTS_N_INSNS (76)}, /* other */
1634 COSTS_N_INSNS (1), /* cost of movsx */
1635 COSTS_N_INSNS (0), /* cost of movzx */
1636 8, /* "large" insn */
1637 17, /* MOVE_RATIO */
c234d831
UB
1638 {4, 4, 4}, /* cost of loading integer registers
1639 in QImode, HImode and SImode.
1640 Relative to reg-reg move (2). */
101a0841 1641 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
1642 {6, 6, 6, 10, 20}, /* cost of loading SSE register
1643 in 32bit, 64bit, 128bit, 256bit and 512bit */
1644 {8, 8, 8, 12, 24}, /* cost of storing SSE register
1645 in 32bit, 64bit, 128bit, 256bit and 512bit */
c234d831 1646 {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
c234d831 1647 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
d321551c
L
1648 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1649 2, /* cost of moving SSE register to integer. */
c234d831
UB
1650 20, 8, /* Gather load static, per_elt. */
1651 22, 10, /* Gather store static, per_elt. */
1652 64, /* size of l1 cache. */
1653 512, /* size of l2 cache. */
1654 64, /* size of prefetch block */
1655 6, /* number of parallel prefetches */
1656 3, /* Branch cost */
1657 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1658 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1659 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1660 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1661 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1662 COSTS_N_INSNS (20), /* cost of FSQRT instruction. */
1663
1664 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1665 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1666 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1667 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
1668 COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
1669 COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
1670 COSTS_N_INSNS (11), /* cost of DIVSS instruction. */
1671 COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
1672 COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
1673 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
1674 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
1675 skylake_memcpy,
1676 skylake_memset,
1677 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1678 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1679 "16:11:8", /* Loop alignment. */
1680 "16:11:8", /* Jump alignment. */
1681 "0:0:8", /* Label alignment. */
1682 "16", /* Func alignment. */
c234d831 1683};
64766e8d
JH
1684 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1685 very small blocks it is better to use loop. For large blocks, libcall can
1686 do nontemporary accesses and beat inline considerably. */
1687static stringop_algs btver1_memcpy[2] = {
1688 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1689 {-1, rep_prefix_4_byte, false}}},
1690 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1691 {-1, libcall, false}}}};
1692static stringop_algs btver1_memset[2] = {
1693 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1694 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1695 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1696 {-1, libcall, false}}}};
1697const struct processor_costs btver1_cost = {
72bb85f8 1698 {
d321551c
L
1699 /* Start of register allocator costs. integer->integer move cost is 2. */
1700 8, /* cost for loading QImode using movzbl */
1701 {6, 8, 6}, /* cost of loading integer registers
1702 in QImode, HImode and SImode.
1703 Relative to reg-reg move (2). */
1704 {6, 8, 6}, /* cost of storing integer registers */
1705 4, /* cost of reg,reg fld/fst */
1706 {12, 12, 28}, /* cost of loading fp registers
1707 in SFmode, DFmode and XFmode */
1708 {12, 12, 38}, /* cost of storing fp registers
1709 in SFmode, DFmode and XFmode */
1710 4, /* cost of moving MMX register */
1711 {10, 10}, /* cost of loading MMX registers
1712 in SImode and DImode */
1713 {12, 12}, /* cost of storing MMX registers
1714 in SImode and DImode */
1715 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1716 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1717 in 32,64,128,256 and 512-bit */
1718 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1719 in 32,64,128,256 and 512-bit */
1720 14, 14, /* SSE->integer and integer->SSE moves */
1721 /* End of register allocator costs. */
72bb85f8 1722 },
d321551c 1723
64766e8d
JH
1724 COSTS_N_INSNS (1), /* cost of an add instruction */
1725 COSTS_N_INSNS (2), /* cost of a lea instruction */
1726 COSTS_N_INSNS (1), /* variable shift costs */
1727 COSTS_N_INSNS (1), /* constant shift costs */
1728 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1729 COSTS_N_INSNS (4), /* HI */
1730 COSTS_N_INSNS (3), /* SI */
1731 COSTS_N_INSNS (4), /* DI */
1732 COSTS_N_INSNS (5)}, /* other */
1733 0, /* cost of multiply per each bit set */
1734 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1735 COSTS_N_INSNS (35), /* HI */
1736 COSTS_N_INSNS (51), /* SI */
1737 COSTS_N_INSNS (83), /* DI */
1738 COSTS_N_INSNS (83)}, /* other */
1739 COSTS_N_INSNS (1), /* cost of movsx */
1740 COSTS_N_INSNS (1), /* cost of movzx */
1741 8, /* "large" insn */
1742 9, /* MOVE_RATIO */
df41dbaf 1743 {6, 8, 6}, /* cost of loading integer registers
64766e8d
JH
1744 in QImode, HImode and SImode.
1745 Relative to reg-reg move (2). */
df41dbaf 1746 {6, 8, 6}, /* cost of storing integer registers */
d321551c
L
1747 {10, 10, 12, 48, 96}, /* cost of loading SSE register
1748 in 32bit, 64bit, 128bit, 256bit and 512bit */
1749 {10, 10, 12, 48, 96}, /* cost of storing SSE register
1750 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1751 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
b7167993 1752 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
d321551c
L
1753 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1754 14, /* cost of moving SSE register to integer. */
a4fe6139
JH
1755 10, 10, /* Gather load static, per_elt. */
1756 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
1757 32, /* size of l1 cache. */
1758 512, /* size of l2 cache. */
1759 64, /* size of prefetch block */
1760 100, /* number of parallel prefetches */
1761 2, /* Branch cost */
1762 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1763 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1764 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1765 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1766 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1767 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1768
c53c148c 1769 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
1770 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1771 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
1772 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1773 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1774 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
1775 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
1776 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
1777 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
1778 COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
64766e8d
JH
1779 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1780 btver1_memcpy,
1781 btver1_memset,
f6fd8f2b
JH
1782 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
1783 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1784 "16:11:8", /* Loop alignment. */
1785 "16:8:8", /* Jump alignment. */
1786 "0:0:8", /* Label alignment. */
1787 "11", /* Func alignment. */
64766e8d
JH
1788};
1789
1790static stringop_algs btver2_memcpy[2] = {
1791 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1792 {-1, rep_prefix_4_byte, false}}},
1793 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1794 {-1, libcall, false}}}};
1795static stringop_algs btver2_memset[2] = {
1796 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1797 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1798 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1799 {-1, libcall, false}}}};
1800const struct processor_costs btver2_cost = {
72bb85f8 1801 {
d321551c
L
1802 /* Start of register allocator costs. integer->integer move cost is 2. */
1803 8, /* cost for loading QImode using movzbl */
1804 {8, 8, 6}, /* cost of loading integer registers
1805 in QImode, HImode and SImode.
1806 Relative to reg-reg move (2). */
1807 {8, 8, 6}, /* cost of storing integer registers */
1808 4, /* cost of reg,reg fld/fst */
1809 {12, 12, 28}, /* cost of loading fp registers
1810 in SFmode, DFmode and XFmode */
1811 {12, 12, 38}, /* cost of storing fp registers
1812 in SFmode, DFmode and XFmode */
1813 4, /* cost of moving MMX register */
1814 {10, 10}, /* cost of loading MMX registers
1815 in SImode and DImode */
1816 {12, 12}, /* cost of storing MMX registers
1817 in SImode and DImode */
1818 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1819 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1820 in 32,64,128,256 and 512-bit */
1821 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1822 in 32,64,128,256 and 512-bit */
1823 14, 14, /* SSE->integer and integer->SSE moves */
1824 /* End of register allocator costs. */
72bb85f8 1825 },
d321551c 1826
64766e8d
JH
1827 COSTS_N_INSNS (1), /* cost of an add instruction */
1828 COSTS_N_INSNS (2), /* cost of a lea instruction */
1829 COSTS_N_INSNS (1), /* variable shift costs */
1830 COSTS_N_INSNS (1), /* constant shift costs */
1831 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1832 COSTS_N_INSNS (4), /* HI */
1833 COSTS_N_INSNS (3), /* SI */
1834 COSTS_N_INSNS (4), /* DI */
1835 COSTS_N_INSNS (5)}, /* other */
1836 0, /* cost of multiply per each bit set */
1837 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1838 COSTS_N_INSNS (35), /* HI */
1839 COSTS_N_INSNS (51), /* SI */
1840 COSTS_N_INSNS (83), /* DI */
1841 COSTS_N_INSNS (83)}, /* other */
1842 COSTS_N_INSNS (1), /* cost of movsx */
1843 COSTS_N_INSNS (1), /* cost of movzx */
1844 8, /* "large" insn */
1845 9, /* MOVE_RATIO */
df41dbaf 1846 {8, 8, 6}, /* cost of loading integer registers
64766e8d
JH
1847 in QImode, HImode and SImode.
1848 Relative to reg-reg move (2). */
df41dbaf 1849 {8, 8, 6}, /* cost of storing integer registers */
d321551c
L
1850 {10, 10, 12, 48, 96}, /* cost of loading SSE register
1851 in 32bit, 64bit, 128bit, 256bit and 512bit */
1852 {10, 10, 12, 48, 96}, /* cost of storing SSE register
1853 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1854 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
b7167993 1855 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
d321551c
L
1856 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1857 14, /* cost of moving SSE register to integer. */
a4fe6139
JH
1858 10, 10, /* Gather load static, per_elt. */
1859 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
1860 32, /* size of l1 cache. */
1861 2048, /* size of l2 cache. */
1862 64, /* size of prefetch block */
1863 100, /* number of parallel prefetches */
1864 2, /* Branch cost */
1865 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1866 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1867 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1870 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1871
c53c148c 1872 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
1873 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1874 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
1875 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1876 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1877 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
1878 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
1879 COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
1880 COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
1881 COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
64766e8d
JH
1882 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1883 btver2_memcpy,
1884 btver2_memset,
f6fd8f2b
JH
1885 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
1886 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1887 "16:11:8", /* Loop alignment. */
1888 "16:8:8", /* Jump alignment. */
1889 "0:0:8", /* Label alignment. */
1890 "11", /* Func alignment. */
64766e8d
JH
1891};
1892
1893static stringop_algs pentium4_memcpy[2] = {
1894 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1895 DUMMY_STRINGOP_ALGS};
1896static stringop_algs pentium4_memset[2] = {
1897 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1898 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1899 DUMMY_STRINGOP_ALGS};
1900
1901static const
1902struct processor_costs pentium4_cost = {
72bb85f8 1903 {
d321551c 1904 /* Start of register allocator costs. integer->integer move cost is 2. */
df41dbaf 1905 5, /* cost for loading QImode using movzbl */
64766e8d
JH
1906 {4, 5, 4}, /* cost of loading integer registers
1907 in QImode, HImode and SImode.
1908 Relative to reg-reg move (2). */
1909 {2, 3, 2}, /* cost of storing integer registers */
df41dbaf
JH
1910 12, /* cost of reg,reg fld/fst */
1911 {14, 14, 14}, /* cost of loading fp registers
64766e8d 1912 in SFmode, DFmode and XFmode */
df41dbaf 1913 {14, 14, 14}, /* cost of storing fp registers
64766e8d 1914 in SFmode, DFmode and XFmode */
df41dbaf
JH
1915 12, /* cost of moving MMX register */
1916 {16, 16}, /* cost of loading MMX registers
64766e8d 1917 in SImode and DImode */
df41dbaf 1918 {16, 16}, /* cost of storing MMX registers
64766e8d 1919 in SImode and DImode */
df41dbaf
JH
1920 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
1921 {16, 16, 16, 32, 64}, /* cost of loading SSE registers
1922 in 32,64,128,256 and 512-bit */
d321551c
L
1923 {16, 16, 16, 32, 64}, /* cost of storing SSE registers
1924 in 32,64,128,256 and 512-bit */
1925 20, 12, /* SSE->integer and integer->SSE moves */
1926 /* End of register allocator costs. */
72bb85f8 1927 },
d321551c
L
1928
1929 COSTS_N_INSNS (1), /* cost of an add instruction */
1930 COSTS_N_INSNS (3), /* cost of a lea instruction */
1931 COSTS_N_INSNS (4), /* variable shift costs */
1932 COSTS_N_INSNS (4), /* constant shift costs */
1933 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1934 COSTS_N_INSNS (15), /* HI */
1935 COSTS_N_INSNS (15), /* SI */
1936 COSTS_N_INSNS (15), /* DI */
1937 COSTS_N_INSNS (15)}, /* other */
1938 0, /* cost of multiply per each bit set */
1939 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1940 COSTS_N_INSNS (56), /* HI */
1941 COSTS_N_INSNS (56), /* SI */
1942 COSTS_N_INSNS (56), /* DI */
1943 COSTS_N_INSNS (56)}, /* other */
1944 COSTS_N_INSNS (1), /* cost of movsx */
1945 COSTS_N_INSNS (1), /* cost of movzx */
1946 16, /* "large" insn */
1947 6, /* MOVE_RATIO */
1948 {4, 5, 4}, /* cost of loading integer registers
1949 in QImode, HImode and SImode.
1950 Relative to reg-reg move (2). */
1951 {2, 3, 2}, /* cost of storing integer registers */
1952 {16, 16, 16, 32, 64}, /* cost of loading SSE register
1953 in 32bit, 64bit, 128bit, 256bit and 512bit */
1954 {16, 16, 16, 32, 64}, /* cost of storing SSE register
1955 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 1956 {32, 32, 32, 64, 128}, /* cost of unaligned loads. */
df41dbaf 1957 {32, 32, 32, 64, 128}, /* cost of unaligned stores. */
d321551c
L
1958 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
1959 20, /* cost of moving SSE register to integer. */
a4fe6139
JH
1960 16, 16, /* Gather load static, per_elt. */
1961 16, 16, /* Gather store static, per_elt. */
64766e8d
JH
1962 8, /* size of l1 cache. */
1963 256, /* size of l2 cache. */
1964 64, /* size of prefetch block */
1965 6, /* number of parallel prefetches */
1966 2, /* Branch cost */
1967 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1968 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1969 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1970 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1971 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1972 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
6065f444 1973
c53c148c 1974 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1975 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1976 COSTS_N_INSNS (6), /* cost of MULSS instruction. */
1977 COSTS_N_INSNS (6), /* cost of MULSD instruction. */
c53c148c
JH
1978 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
1979 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
1980 COSTS_N_INSNS (23), /* cost of DIVSS instruction. */
1981 COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
1982 COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
1983 COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
64766e8d
JH
1984 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1985 pentium4_memcpy,
1986 pentium4_memset,
f6fd8f2b
JH
1987 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1988 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1989 NULL, /* Loop alignment. */
1990 NULL, /* Jump alignment. */
1991 NULL, /* Label alignment. */
1992 NULL, /* Func alignment. */
64766e8d
JH
1993};
1994
1995static stringop_algs nocona_memcpy[2] = {
1996 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1997 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1998 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1999
2000static stringop_algs nocona_memset[2] = {
2001 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
2002 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2003 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
2004 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2005
2006static const
2007struct processor_costs nocona_cost = {
72bb85f8 2008 {
d321551c
L
2009 /* Start of register allocator costs. integer->integer move cost is 2. */
2010 4, /* cost for loading QImode using movzbl */
2011 {4, 4, 4}, /* cost of loading integer registers
2012 in QImode, HImode and SImode.
2013 Relative to reg-reg move (2). */
2014 {4, 4, 4}, /* cost of storing integer registers */
2015 12, /* cost of reg,reg fld/fst */
2016 {14, 14, 14}, /* cost of loading fp registers
2017 in SFmode, DFmode and XFmode */
2018 {14, 14, 14}, /* cost of storing fp registers
2019 in SFmode, DFmode and XFmode */
2020 14, /* cost of moving MMX register */
2021 {12, 12}, /* cost of loading MMX registers
2022 in SImode and DImode */
2023 {12, 12}, /* cost of storing MMX registers
2024 in SImode and DImode */
2025 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
2026 {12, 12, 12, 24, 48}, /* cost of loading SSE registers
2027 in 32,64,128,256 and 512-bit */
2028 {12, 12, 12, 24, 48}, /* cost of storing SSE registers
2029 in 32,64,128,256 and 512-bit */
2030 20, 12, /* SSE->integer and integer->SSE moves */
2031 /* End of register allocator costs. */
72bb85f8 2032 },
d321551c 2033
64766e8d
JH
2034 COSTS_N_INSNS (1), /* cost of an add instruction */
2035 COSTS_N_INSNS (1), /* cost of a lea instruction */
2036 COSTS_N_INSNS (1), /* variable shift costs */
2037 COSTS_N_INSNS (1), /* constant shift costs */
2038 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
2039 COSTS_N_INSNS (10), /* HI */
2040 COSTS_N_INSNS (10), /* SI */
2041 COSTS_N_INSNS (10), /* DI */
2042 COSTS_N_INSNS (10)}, /* other */
2043 0, /* cost of multiply per each bit set */
2044 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
2045 COSTS_N_INSNS (66), /* HI */
2046 COSTS_N_INSNS (66), /* SI */
2047 COSTS_N_INSNS (66), /* DI */
2048 COSTS_N_INSNS (66)}, /* other */
2049 COSTS_N_INSNS (1), /* cost of movsx */
2050 COSTS_N_INSNS (1), /* cost of movzx */
2051 16, /* "large" insn */
2052 17, /* MOVE_RATIO */
64766e8d
JH
2053 {4, 4, 4}, /* cost of loading integer registers
2054 in QImode, HImode and SImode.
2055 Relative to reg-reg move (2). */
2056 {4, 4, 4}, /* cost of storing integer registers */
d321551c
L
2057 {12, 12, 12, 24, 48}, /* cost of loading SSE register
2058 in 32bit, 64bit, 128bit, 256bit and 512bit */
2059 {12, 12, 12, 24, 48}, /* cost of storing SSE register
2060 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2061 {24, 24, 24, 48, 96}, /* cost of unaligned loads. */
df41dbaf 2062 {24, 24, 24, 48, 96}, /* cost of unaligned stores. */
d321551c
L
2063 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
2064 20, /* cost of moving SSE register to integer. */
a4fe6139
JH
2065 12, 12, /* Gather load static, per_elt. */
2066 12, 12, /* Gather store static, per_elt. */
64766e8d
JH
2067 8, /* size of l1 cache. */
2068 1024, /* size of l2 cache. */
2069 64, /* size of prefetch block */
2070 8, /* number of parallel prefetches */
2071 1, /* Branch cost */
2072 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
2073 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2074 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
2075 COSTS_N_INSNS (3), /* cost of FABS instruction. */
2076 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
2077 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
6065f444 2078
c53c148c 2079 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
2080 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
2081 COSTS_N_INSNS (7), /* cost of MULSS instruction. */
2082 COSTS_N_INSNS (7), /* cost of MULSD instruction. */
c53c148c
JH
2083 COSTS_N_INSNS (7), /* cost of FMA SS instruction. */
2084 COSTS_N_INSNS (7), /* cost of FMA SD instruction. */
6065f444
JH
2085 COSTS_N_INSNS (32), /* cost of DIVSS instruction. */
2086 COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
2087 COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
2088 COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
64766e8d
JH
2089 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2090 nocona_memcpy,
2091 nocona_memset,
f6fd8f2b
JH
2092 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2093 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2094 NULL, /* Loop alignment. */
2095 NULL, /* Jump alignment. */
2096 NULL, /* Label alignment. */
2097 NULL, /* Func alignment. */
64766e8d
JH
2098};
2099
2100static stringop_algs atom_memcpy[2] = {
2101 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2102 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2103 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2104static stringop_algs atom_memset[2] = {
2105 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2106 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2107 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2108 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2109static const
2110struct processor_costs atom_cost = {
72bb85f8 2111 {
d321551c
L
2112 /* Start of register allocator costs. integer->integer move cost is 2. */
2113 6, /* cost for loading QImode using movzbl */
2114 {6, 6, 6}, /* cost of loading integer registers
2115 in QImode, HImode and SImode.
2116 Relative to reg-reg move (2). */
2117 {6, 6, 6}, /* cost of storing integer registers */
2118 4, /* cost of reg,reg fld/fst */
2119 {6, 6, 18}, /* cost of loading fp registers
2120 in SFmode, DFmode and XFmode */
2121 {14, 14, 24}, /* cost of storing fp registers
2122 in SFmode, DFmode and XFmode */
2123 2, /* cost of moving MMX register */
2124 {8, 8}, /* cost of loading MMX registers
2125 in SImode and DImode */
2126 {10, 10}, /* cost of storing MMX registers
2127 in SImode and DImode */
2128 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2129 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2130 in 32,64,128,256 and 512-bit */
2131 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2132 in 32,64,128,256 and 512-bit */
2133 8, 6, /* SSE->integer and integer->SSE moves */
2134 /* End of register allocator costs. */
72bb85f8 2135 },
d321551c 2136
64766e8d
JH
2137 COSTS_N_INSNS (1), /* cost of an add instruction */
2138 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2139 COSTS_N_INSNS (1), /* variable shift costs */
2140 COSTS_N_INSNS (1), /* constant shift costs */
2141 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2142 COSTS_N_INSNS (4), /* HI */
2143 COSTS_N_INSNS (3), /* SI */
2144 COSTS_N_INSNS (4), /* DI */
2145 COSTS_N_INSNS (2)}, /* other */
2146 0, /* cost of multiply per each bit set */
2147 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2148 COSTS_N_INSNS (26), /* HI */
2149 COSTS_N_INSNS (42), /* SI */
2150 COSTS_N_INSNS (74), /* DI */
2151 COSTS_N_INSNS (74)}, /* other */
2152 COSTS_N_INSNS (1), /* cost of movsx */
2153 COSTS_N_INSNS (1), /* cost of movzx */
2154 8, /* "large" insn */
2155 17, /* MOVE_RATIO */
df41dbaf 2156 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
2157 in QImode, HImode and SImode.
2158 Relative to reg-reg move (2). */
df41dbaf 2159 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2160 {8, 8, 8, 16, 32}, /* cost of loading SSE register
2161 in 32bit, 64bit, 128bit, 256bit and 512bit */
2162 {8, 8, 8, 16, 32}, /* cost of storing SSE register
2163 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2164 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 2165 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
2166 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2167 8, /* cost of moving SSE register to integer. */
a4fe6139
JH
2168 8, 8, /* Gather load static, per_elt. */
2169 8, 8, /* Gather store static, per_elt. */
64766e8d
JH
2170 32, /* size of l1 cache. */
2171 256, /* size of l2 cache. */
2172 64, /* size of prefetch block */
2173 6, /* number of parallel prefetches */
2174 3, /* Branch cost */
2175 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2176 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2177 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2178 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2179 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2180 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2181
c53c148c 2182 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2183 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
2184 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2185 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2186 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2187 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2188 COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
2189 COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
2190 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
2191 COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
64766e8d
JH
2192 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
2193 atom_memcpy,
2194 atom_memset,
f6fd8f2b
JH
2195 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2196 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2197 "16", /* Loop alignment. */
2198 "16:8:8", /* Jump alignment. */
2199 "0:0:8", /* Label alignment. */
2200 "16", /* Func alignment. */
64766e8d
JH
2201};
2202
2203static stringop_algs slm_memcpy[2] = {
2204 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2205 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2206 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2207static stringop_algs slm_memset[2] = {
2208 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2209 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2210 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2211 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2212static const
2213struct processor_costs slm_cost = {
72bb85f8 2214 {
d321551c
L
2215 /* Start of register allocator costs. integer->integer move cost is 2. */
2216 8, /* cost for loading QImode using movzbl */
2217 {8, 8, 8}, /* cost of loading integer registers
2218 in QImode, HImode and SImode.
2219 Relative to reg-reg move (2). */
2220 {6, 6, 6}, /* cost of storing integer registers */
2221 2, /* cost of reg,reg fld/fst */
2222 {8, 8, 18}, /* cost of loading fp registers
2223 in SFmode, DFmode and XFmode */
2224 {6, 6, 18}, /* cost of storing fp registers
2225 in SFmode, DFmode and XFmode */
2226 2, /* cost of moving MMX register */
2227 {8, 8}, /* cost of loading MMX registers
2228 in SImode and DImode */
2229 {6, 6}, /* cost of storing MMX registers
2230 in SImode and DImode */
2231 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2232 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2233 in 32,64,128,256 and 512-bit */
2234 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2235 in 32,64,128,256 and 512-bit */
2236 8, 6, /* SSE->integer and integer->SSE moves */
2237 /* End of register allocator costs. */
72bb85f8 2238 },
d321551c 2239
64766e8d
JH
2240 COSTS_N_INSNS (1), /* cost of an add instruction */
2241 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2242 COSTS_N_INSNS (1), /* variable shift costs */
2243 COSTS_N_INSNS (1), /* constant shift costs */
2244 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2245 COSTS_N_INSNS (3), /* HI */
2246 COSTS_N_INSNS (3), /* SI */
2247 COSTS_N_INSNS (4), /* DI */
2248 COSTS_N_INSNS (2)}, /* other */
2249 0, /* cost of multiply per each bit set */
2250 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2251 COSTS_N_INSNS (26), /* HI */
2252 COSTS_N_INSNS (42), /* SI */
2253 COSTS_N_INSNS (74), /* DI */
2254 COSTS_N_INSNS (74)}, /* other */
2255 COSTS_N_INSNS (1), /* cost of movsx */
2256 COSTS_N_INSNS (1), /* cost of movzx */
2257 8, /* "large" insn */
2258 17, /* MOVE_RATIO */
df41dbaf 2259 {8, 8, 8}, /* cost of loading integer registers
64766e8d
JH
2260 in QImode, HImode and SImode.
2261 Relative to reg-reg move (2). */
df41dbaf 2262 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2263 {8, 8, 8, 16, 32}, /* cost of loading SSE register
2264 in 32bit, 64bit, 128bit, 256bit and 512bit */
2265 {8, 8, 8, 16, 32}, /* cost of storing SSE register
2266 in SImode, DImode and TImode. */
df41dbaf 2267 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 2268 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
2269 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2270 8, /* cost of moving SSE register to integer. */
a4fe6139
JH
2271 8, 8, /* Gather load static, per_elt. */
2272 8, 8, /* Gather store static, per_elt. */
64766e8d
JH
2273 32, /* size of l1 cache. */
2274 256, /* size of l2 cache. */
2275 64, /* size of prefetch block */
2276 6, /* number of parallel prefetches */
2277 3, /* Branch cost */
2278 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2279 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2280 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2281 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2282 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2283 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2284
c53c148c 2285 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2286 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2287 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2288 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2289 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2290 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2291 COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
2292 COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
2293 COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
2294 COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
64766e8d
JH
2295 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2296 slm_memcpy,
2297 slm_memset,
f6fd8f2b
JH
2298 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2299 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2300 "16", /* Loop alignment. */
2301 "16:8:8", /* Jump alignment. */
2302 "0:0:8", /* Label alignment. */
2303 "16", /* Func alignment. */
64766e8d
JH
2304};
2305
2306static stringop_algs intel_memcpy[2] = {
2307 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2308 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2309 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2310static stringop_algs intel_memset[2] = {
2311 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2312 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2313 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2314 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2315static const
2316struct processor_costs intel_cost = {
72bb85f8 2317 {
d321551c
L
2318 /* Start of register allocator costs. integer->integer move cost is 2. */
2319 6, /* cost for loading QImode using movzbl */
2320 {4, 4, 4}, /* cost of loading integer registers
2321 in QImode, HImode and SImode.
2322 Relative to reg-reg move (2). */
2323 {6, 6, 6}, /* cost of storing integer registers */
2324 2, /* cost of reg,reg fld/fst */
2325 {6, 6, 8}, /* cost of loading fp registers
2326 in SFmode, DFmode and XFmode */
2327 {6, 6, 10}, /* cost of storing fp registers
2328 in SFmode, DFmode and XFmode */
2329 2, /* cost of moving MMX register */
2330 {6, 6}, /* cost of loading MMX registers
2331 in SImode and DImode */
2332 {6, 6}, /* cost of storing MMX registers
2333 in SImode and DImode */
2334 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
2335 {6, 6, 6, 6, 6}, /* cost of loading SSE registers
2336 in 32,64,128,256 and 512-bit */
2337 {6, 6, 6, 6, 6}, /* cost of storing SSE registers
2338 in 32,64,128,256 and 512-bit */
2339 4, 4, /* SSE->integer and integer->SSE moves */
2340 /* End of register allocator costs. */
72bb85f8 2341 },
d321551c 2342
64766e8d
JH
2343 COSTS_N_INSNS (1), /* cost of an add instruction */
2344 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2345 COSTS_N_INSNS (1), /* variable shift costs */
2346 COSTS_N_INSNS (1), /* constant shift costs */
2347 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2348 COSTS_N_INSNS (3), /* HI */
2349 COSTS_N_INSNS (3), /* SI */
2350 COSTS_N_INSNS (4), /* DI */
2351 COSTS_N_INSNS (2)}, /* other */
2352 0, /* cost of multiply per each bit set */
2353 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2354 COSTS_N_INSNS (26), /* HI */
2355 COSTS_N_INSNS (42), /* SI */
2356 COSTS_N_INSNS (74), /* DI */
2357 COSTS_N_INSNS (74)}, /* other */
2358 COSTS_N_INSNS (1), /* cost of movsx */
2359 COSTS_N_INSNS (1), /* cost of movzx */
2360 8, /* "large" insn */
2361 17, /* MOVE_RATIO */
64766e8d
JH
2362 {4, 4, 4}, /* cost of loading integer registers
2363 in QImode, HImode and SImode.
2364 Relative to reg-reg move (2). */
af863030 2365 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2366 {6, 6, 6, 6, 6}, /* cost of loading SSE register
2367 in 32bit, 64bit, 128bit, 256bit and 512bit */
2368 {6, 6, 6, 6, 6}, /* cost of storing SSE register
2369 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2370 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
df41dbaf 2371 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
d321551c
L
2372 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
2373 4, /* cost of moving SSE register to integer. */
a4fe6139
JH
2374 6, 6, /* Gather load static, per_elt. */
2375 6, 6, /* Gather store static, per_elt. */
64766e8d
JH
2376 32, /* size of l1 cache. */
2377 256, /* size of l2 cache. */
2378 64, /* size of prefetch block */
2379 6, /* number of parallel prefetches */
2380 3, /* Branch cost */
2381 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2382 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2383 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2384 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2385 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2386 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2387
3ff59baa 2388 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2389 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
2390 COSTS_N_INSNS (8), /* cost of MULSS instruction. */
2391 COSTS_N_INSNS (8), /* cost of MULSD instruction. */
c53c148c
JH
2392 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2393 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2394 COSTS_N_INSNS (20), /* cost of DIVSS instruction. */
2395 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
2396 COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
2397 COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
64766e8d
JH
2398 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2399 intel_memcpy,
2400 intel_memset,
f6fd8f2b
JH
2401 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2402 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2403 "16", /* Loop alignment. */
2404 "16:8:8", /* Jump alignment. */
2405 "0:0:8", /* Label alignment. */
2406 "16", /* Func alignment. */
64766e8d
JH
2407};
2408
2409/* Generic should produce code tuned for Core-i7 (and newer chips)
2410 and btver1 (and newer chips). */
2411
2412static stringop_algs generic_memcpy[2] = {
2413 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
2414 {-1, libcall, false}}},
2415 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
2416 {-1, libcall, false}}}};
2417static stringop_algs generic_memset[2] = {
2418 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
2419 {-1, libcall, false}}},
2420 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
2421 {-1, libcall, false}}}};
2422static const
2423struct processor_costs generic_cost = {
72bb85f8 2424 {
d321551c
L
2425 /* Start of register allocator costs. integer->integer move cost is 2. */
2426 6, /* cost for loading QImode using movzbl */
2427 {6, 6, 6}, /* cost of loading integer registers
2428 in QImode, HImode and SImode.
2429 Relative to reg-reg move (2). */
2430 {6, 6, 6}, /* cost of storing integer registers */
2431 4, /* cost of reg,reg fld/fst */
2432 {6, 6, 12}, /* cost of loading fp registers
2433 in SFmode, DFmode and XFmode */
2434 {6, 6, 12}, /* cost of storing fp registers
2435 in SFmode, DFmode and XFmode */
2436 2, /* cost of moving MMX register */
2437 {6, 6}, /* cost of loading MMX registers
2438 in SImode and DImode */
2439 {6, 6}, /* cost of storing MMX registers
2440 in SImode and DImode */
2441 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2442 {6, 6, 6, 10, 15}, /* cost of loading SSE registers
2443 in 32,64,128,256 and 512-bit */
2444 {6, 6, 6, 10, 15}, /* cost of storing SSE registers
2445 in 32,64,128,256 and 512-bit */
2446 6, 6, /* SSE->integer and integer->SSE moves */
2447 /* End of register allocator costs. */
72bb85f8 2448 },
d321551c 2449
64766e8d 2450 COSTS_N_INSNS (1), /* cost of an add instruction */
ef9eec0b 2451 /* Setting cost to 2 makes our current implementation of synth_mult result in
64766e8d
JH
2452 use of unnecessary temporary registers causing regression on several
2453 SPECfp benchmarks. */
2454 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2455 COSTS_N_INSNS (1), /* variable shift costs */
2456 COSTS_N_INSNS (1), /* constant shift costs */
2457 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2458 COSTS_N_INSNS (4), /* HI */
2459 COSTS_N_INSNS (3), /* SI */
2460 COSTS_N_INSNS (4), /* DI */
7c080ade 2461 COSTS_N_INSNS (4)}, /* other */
64766e8d 2462 0, /* cost of multiply per each bit set */
7c080ade
JH
2463 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */
2464 COSTS_N_INSNS (22), /* HI */
2465 COSTS_N_INSNS (30), /* SI */
64766e8d
JH
2466 COSTS_N_INSNS (74), /* DI */
2467 COSTS_N_INSNS (74)}, /* other */
2468 COSTS_N_INSNS (1), /* cost of movsx */
2469 COSTS_N_INSNS (1), /* cost of movzx */
2470 8, /* "large" insn */
2471 17, /* MOVE_RATIO */
d555138e 2472 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
2473 in QImode, HImode and SImode.
2474 Relative to reg-reg move (2). */
af863030 2475 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2476 {6, 6, 6, 10, 15}, /* cost of loading SSE register
2477 in 32bit, 64bit, 128bit, 256bit and 512bit */
2478 {6, 6, 6, 10, 15}, /* cost of storing SSE register
2479 in 32bit, 64bit, 128bit, 256bit and 512bit */
7c080ade 2480 {6, 6, 6, 10, 15}, /* cost of unaligned loads. */
7c080ade 2481 {6, 6, 6, 10, 15}, /* cost of unaligned storess. */
d321551c
L
2482 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2483 6, /* cost of moving SSE register to integer. */
7c080ade
JH
2484 18, 6, /* Gather load static, per_elt. */
2485 18, 6, /* Gather store static, per_elt. */
64766e8d
JH
2486 32, /* size of l1 cache. */
2487 512, /* size of l2 cache. */
2488 64, /* size of prefetch block */
2489 6, /* number of parallel prefetches */
2490 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2491 value is increased to perhaps more appropriate value of 5. */
2492 3, /* Branch cost */
ef9eec0b 2493 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
7c080ade 2494 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
e8e3054e 2495 COSTS_N_INSNS (17), /* cost of FDIV instruction. */
ef9eec0b
JH
2496 COSTS_N_INSNS (1), /* cost of FABS instruction. */
2497 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
e8e3054e 2498 COSTS_N_INSNS (14), /* cost of FSQRT instruction. */
6065f444 2499
ef9eec0b
JH
2500 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
2501 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2502 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2503 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
2504 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2505 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
e8e3054e
JH
2506 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
2507 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
2508 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
2509 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
7c080ade 2510 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
64766e8d
JH
2511 generic_memcpy,
2512 generic_memset,
e8e3054e
JH
2513 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
2514 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2515 "16:11:8", /* Loop alignment. */
2516 "16:11:8", /* Jump alignment. */
2517 "0:0:8", /* Label alignment. */
2518 "16", /* Func alignment. */
64766e8d
JH
2519};
2520
2521/* core_cost should produce code tuned for Core familly of CPUs. */
2522static stringop_algs core_memcpy[2] = {
2523 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2524 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2525 {-1, libcall, false}}}};
2526static stringop_algs core_memset[2] = {
2527 {libcall, {{6, loop_1_byte, true},
2528 {24, loop, true},
2529 {8192, rep_prefix_4_byte, true},
2530 {-1, libcall, false}}},
2531 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2532 {-1, libcall, false}}}};
2533
2534static const
2535struct processor_costs core_cost = {
72bb85f8 2536 {
d321551c
L
2537 /* Start of register allocator costs. integer->integer move cost is 2. */
2538 6, /* cost for loading QImode using movzbl */
2539 {4, 4, 4}, /* cost of loading integer registers
2540 in QImode, HImode and SImode.
2541 Relative to reg-reg move (2). */
2542 {6, 6, 6}, /* cost of storing integer registers */
2543 2, /* cost of reg,reg fld/fst */
2544 {6, 6, 8}, /* cost of loading fp registers
2545 in SFmode, DFmode and XFmode */
2546 {6, 6, 10}, /* cost of storing fp registers
2547 in SFmode, DFmode and XFmode */
2548 2, /* cost of moving MMX register */
2549 {6, 6}, /* cost of loading MMX registers
2550 in SImode and DImode */
2551 {6, 6}, /* cost of storing MMX registers
2552 in SImode and DImode */
2553 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2554 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
2555 in 32,64,128,256 and 512-bit */
2556 {6, 6, 6, 6, 12}, /* cost of storing SSE registers
2557 in 32,64,128,256 and 512-bit */
4e9ad7c9 2558 6, 6, /* SSE->integer and integer->SSE moves */
d321551c 2559 /* End of register allocator costs. */
72bb85f8 2560 },
d321551c 2561
64766e8d
JH
2562 COSTS_N_INSNS (1), /* cost of an add instruction */
2563 /* On all chips taken into consideration lea is 2 cycles and more. With
2564 this cost however our current implementation of synth_mult results in
2565 use of unnecessary temporary registers causing regression on several
2566 SPECfp benchmarks. */
2567 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2568 COSTS_N_INSNS (1), /* variable shift costs */
2569 COSTS_N_INSNS (1), /* constant shift costs */
2570 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2571 COSTS_N_INSNS (4), /* HI */
2572 COSTS_N_INSNS (3), /* SI */
a2ef9558
MT
2573 /* Here we tune for Sandybridge or newer. */
2574 COSTS_N_INSNS (3), /* DI */
2575 COSTS_N_INSNS (3)}, /* other */
64766e8d 2576 0, /* cost of multiply per each bit set */
02308bd3
MT
2577 /* Expanding div/mod currently doesn't consider parallelism. So the cost
2578 model is not realistic. We compensate by increasing the latencies a bit. */
2579 {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
2580 COSTS_N_INSNS (11), /* HI */
2581 COSTS_N_INSNS (14), /* SI */
ffa3ce53
JH
2582 COSTS_N_INSNS (81), /* DI */
2583 COSTS_N_INSNS (81)}, /* other */
64766e8d
JH
2584 COSTS_N_INSNS (1), /* cost of movsx */
2585 COSTS_N_INSNS (1), /* cost of movzx */
2586 8, /* "large" insn */
2587 17, /* MOVE_RATIO */
64766e8d
JH
2588 {4, 4, 4}, /* cost of loading integer registers
2589 in QImode, HImode and SImode.
2590 Relative to reg-reg move (2). */
ffa3ce53 2591 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2592 {6, 6, 6, 6, 12}, /* cost of loading SSE register
2593 in 32bit, 64bit, 128bit, 256bit and 512bit */
2594 {6, 6, 6, 6, 12}, /* cost of storing SSE register
2595 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2596 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
df41dbaf 2597 {6, 6, 6, 6, 12}, /* cost of unaligned stores. */
d321551c
L
2598 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2599 2, /* cost of moving SSE register to integer. */
a4fe6139
JH
2600 /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
2601 rec. throughput 6.
2602 So 5 uops statically and one uops per load. */
2603 10, 6, /* Gather load static, per_elt. */
2604 10, 6, /* Gather store static, per_elt. */
64766e8d
JH
2605 64, /* size of l1 cache. */
2606 512, /* size of l2 cache. */
2607 64, /* size of prefetch block */
2608 6, /* number of parallel prefetches */
2609 /* FIXME perhaps more appropriate value is 5. */
2610 3, /* Branch cost */
ef9eec0b
JH
2611 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
2612 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
ffa3ce53 2613 /* 10-24 */
ef9eec0b
JH
2614 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
2615 COSTS_N_INSNS (1), /* cost of FABS instruction. */
2616 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
ffa3ce53 2617 COSTS_N_INSNS (23), /* cost of FSQRT instruction. */
6065f444 2618
c53c148c 2619 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2620 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2621 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2622 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2623 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2624 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
2625 COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
2626 COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
2627 COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
2628 COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
64766e8d
JH
2629 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
2630 core_memcpy,
2631 core_memset,
f6fd8f2b
JH
2632 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2633 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2634 "16:11:8", /* Loop alignment. */
2635 "16:11:8", /* Jump alignment. */
2636 "0:0:8", /* Label alignment. */
2637 "16", /* Func alignment. */
64766e8d
JH
2638};
2639