]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/x86-tune-costs.h
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / x86-tune-costs.h
CommitLineData
df41dbaf 1/* Costs of operations of individual x86 CPUs.
8d9254fc 2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
64766e8d 3
df41dbaf
JH
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16Under Section 7 of GPL version 3, you are granted additional
17permissions described in the GCC Runtime Library Exception, version
183.1, as published by the Free Software Foundation.
19
20You should have received a copy of the GNU General Public License and
21a copy of the GCC Runtime Library Exception along with this program;
22see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23<http://www.gnu.org/licenses/>. */
64766e8d
JH
24/* Processor costs (relative to an add) */
25/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
26#define COSTS_N_BYTES(N) ((N) * 2)
27
28#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
29
30static stringop_algs ix86_size_memcpy[2] = {
31 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
32 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
33static stringop_algs ix86_size_memset[2] = {
34 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
35 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
36
37const
38struct processor_costs ix86_size_cost = {/* costs for tuning for size */
72bb85f8 39 {
d321551c
L
40 /* Start of register allocator costs. integer->integer move cost is 2. */
41 2, /* cost for loading QImode using movzbl */
42 {2, 2, 2}, /* cost of loading integer registers
43 in QImode, HImode and SImode.
44 Relative to reg-reg move (2). */
45 {2, 2, 2}, /* cost of storing integer registers */
46 2, /* cost of reg,reg fld/fst */
47 {2, 2, 2}, /* cost of loading fp registers
48 in SFmode, DFmode and XFmode */
49 {2, 2, 2}, /* cost of storing fp registers
50 in SFmode, DFmode and XFmode */
51 3, /* cost of moving MMX register */
52 {3, 3}, /* cost of loading MMX registers
53 in SImode and DImode */
54 {3, 3}, /* cost of storing MMX registers
55 in SImode and DImode */
56 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
57 {3, 3, 3, 3, 3}, /* cost of loading SSE registers
58 in 32,64,128,256 and 512-bit */
59 {3, 3, 3, 3, 3}, /* cost of storing SSE registers
60 in 32,64,128,256 and 512-bit */
61 3, 3, /* SSE->integer and integer->SSE moves */
62 /* End of register allocator costs. */
72bb85f8 63 },
d321551c 64
64766e8d
JH
65 COSTS_N_BYTES (2), /* cost of an add instruction */
66 COSTS_N_BYTES (3), /* cost of a lea instruction */
67 COSTS_N_BYTES (2), /* variable shift costs */
68 COSTS_N_BYTES (3), /* constant shift costs */
69 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
70 COSTS_N_BYTES (3), /* HI */
71 COSTS_N_BYTES (3), /* SI */
72 COSTS_N_BYTES (3), /* DI */
73 COSTS_N_BYTES (5)}, /* other */
74 0, /* cost of multiply per each bit set */
75 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
76 COSTS_N_BYTES (3), /* HI */
77 COSTS_N_BYTES (3), /* SI */
78 COSTS_N_BYTES (3), /* DI */
79 COSTS_N_BYTES (5)}, /* other */
80 COSTS_N_BYTES (3), /* cost of movsx */
81 COSTS_N_BYTES (3), /* cost of movzx */
82 0, /* "large" insn */
83 2, /* MOVE_RATIO */
25e22b19 84 2, /* CLEAR_RATIO */
64766e8d
JH
85 {2, 2, 2}, /* cost of loading integer registers
86 in QImode, HImode and SImode.
87 Relative to reg-reg move (2). */
88 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
89 {3, 3, 3, 3, 3}, /* cost of loading SSE register
90 in 32bit, 64bit, 128bit, 256bit and 512bit */
91 {3, 3, 3, 3, 3}, /* cost of storing SSE register
92 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf
JH
93 {3, 3, 3, 3, 3}, /* cost of unaligned SSE load
94 in 128bit, 256bit and 512bit */
d321551c 95 {3, 3, 3, 3, 3}, /* cost of unaligned SSE store
df41dbaf 96 in 128bit, 256bit and 512bit */
d321551c
L
97 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */
98 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
99 5, 0, /* Gather load static, per_elt. */
100 5, 0, /* Gather store static, per_elt. */
64766e8d
JH
101 0, /* size of l1 cache */
102 0, /* size of l2 cache */
103 0, /* size of prefetch block */
104 0, /* number of parallel prefetches */
105 2, /* Branch cost */
106 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
107 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
108 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
109 COSTS_N_BYTES (2), /* cost of FABS instruction. */
110 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
111 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
6065f444 112
c53c148c 113 COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */
6065f444
JH
114 COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */
115 COSTS_N_BYTES (2), /* cost of MULSS instruction. */
116 COSTS_N_BYTES (2), /* cost of MULSD instruction. */
c53c148c
JH
117 COSTS_N_BYTES (2), /* cost of FMA SS instruction. */
118 COSTS_N_BYTES (2), /* cost of FMA SD instruction. */
6065f444
JH
119 COSTS_N_BYTES (2), /* cost of DIVSS instruction. */
120 COSTS_N_BYTES (2), /* cost of DIVSD instruction. */
121 COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */
122 COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */
64766e8d
JH
123 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
124 ix86_size_memcpy,
125 ix86_size_memset,
f6fd8f2b
JH
126 COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
127 COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
128 NULL, /* Loop alignment. */
129 NULL, /* Jump alignment. */
130 NULL, /* Label alignment. */
131 NULL, /* Func alignment. */
64766e8d
JH
132};
133
134/* Processor costs (relative to an add) */
135static stringop_algs i386_memcpy[2] = {
136 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
137 DUMMY_STRINGOP_ALGS};
138static stringop_algs i386_memset[2] = {
139 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
140 DUMMY_STRINGOP_ALGS};
141
142static const
143struct processor_costs i386_cost = { /* 386 specific costs */
72bb85f8 144 {
d321551c
L
145 /* Start of register allocator costs. integer->integer move cost is 2. */
146 4, /* cost for loading QImode using movzbl */
147 {2, 4, 2}, /* cost of loading integer registers
148 in QImode, HImode and SImode.
149 Relative to reg-reg move (2). */
150 {2, 4, 2}, /* cost of storing integer registers */
151 2, /* cost of reg,reg fld/fst */
152 {8, 8, 8}, /* cost of loading fp registers
153 in SFmode, DFmode and XFmode */
154 {8, 8, 8}, /* cost of storing fp registers
155 in SFmode, DFmode and XFmode */
156 2, /* cost of moving MMX register */
157 {4, 8}, /* cost of loading MMX registers
158 in SImode and DImode */
159 {4, 8}, /* cost of storing MMX registers
160 in SImode and DImode */
161 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
162 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
163 in 32,64,128,256 and 512-bit */
164 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
165 in 32,64,128,256 and 512-bit */
166 3, 3, /* SSE->integer and integer->SSE moves */
167 /* End of register allocator costs. */
72bb85f8 168 },
d321551c 169
64766e8d
JH
170 COSTS_N_INSNS (1), /* cost of an add instruction */
171 COSTS_N_INSNS (1), /* cost of a lea instruction */
172 COSTS_N_INSNS (3), /* variable shift costs */
173 COSTS_N_INSNS (2), /* constant shift costs */
174 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
175 COSTS_N_INSNS (6), /* HI */
176 COSTS_N_INSNS (6), /* SI */
177 COSTS_N_INSNS (6), /* DI */
178 COSTS_N_INSNS (6)}, /* other */
179 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
180 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
181 COSTS_N_INSNS (23), /* HI */
182 COSTS_N_INSNS (23), /* SI */
183 COSTS_N_INSNS (23), /* DI */
184 COSTS_N_INSNS (23)}, /* other */
185 COSTS_N_INSNS (3), /* cost of movsx */
186 COSTS_N_INSNS (2), /* cost of movzx */
187 15, /* "large" insn */
188 3, /* MOVE_RATIO */
25e22b19 189 3, /* CLEAR_RATIO */
64766e8d
JH
190 {2, 4, 2}, /* cost of loading integer registers
191 in QImode, HImode and SImode.
192 Relative to reg-reg move (2). */
193 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
194 {4, 8, 16, 32, 64}, /* cost of loading SSE register
195 in 32bit, 64bit, 128bit, 256bit and 512bit */
196 {4, 8, 16, 32, 64}, /* cost of storing SSE register
197 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 198 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 199 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
200 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
201 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
202 4, 4, /* Gather load static, per_elt. */
203 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
204 0, /* size of l1 cache */
205 0, /* size of l2 cache */
206 0, /* size of prefetch block */
207 0, /* number of parallel prefetches */
208 1, /* Branch cost */
209 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
210 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
211 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
212 COSTS_N_INSNS (22), /* cost of FABS instruction. */
213 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
214 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
6065f444 215
c53c148c 216 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
217 COSTS_N_INSNS (23), /* cost of ADDSS/SD SUBSS/SD insns. */
218 COSTS_N_INSNS (27), /* cost of MULSS instruction. */
219 COSTS_N_INSNS (27), /* cost of MULSD instruction. */
c53c148c
JH
220 COSTS_N_INSNS (27), /* cost of FMA SS instruction. */
221 COSTS_N_INSNS (27), /* cost of FMA SD instruction. */
6065f444
JH
222 COSTS_N_INSNS (88), /* cost of DIVSS instruction. */
223 COSTS_N_INSNS (88), /* cost of DIVSD instruction. */
224 COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */
225 COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */
64766e8d
JH
226 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
227 i386_memcpy,
228 i386_memset,
f6fd8f2b
JH
229 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
230 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
231 "4", /* Loop alignment. */
232 "4", /* Jump alignment. */
233 NULL, /* Label alignment. */
234 "4", /* Func alignment. */
64766e8d
JH
235};
236
237static stringop_algs i486_memcpy[2] = {
238 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
239 DUMMY_STRINGOP_ALGS};
240static stringop_algs i486_memset[2] = {
241 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
242 DUMMY_STRINGOP_ALGS};
243
244static const
245struct processor_costs i486_cost = { /* 486 specific costs */
72bb85f8 246 {
d321551c
L
247 /* Start of register allocator costs. integer->integer move cost is 2. */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
264 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
265 in 32,64,128,256 and 512-bit */
266 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
267 in 32,64,128,256 and 512-bit */
268 3, 3, /* SSE->integer and integer->SSE moves */
269 /* End of register allocator costs. */
72bb85f8 270 },
d321551c 271
64766e8d
JH
272 COSTS_N_INSNS (1), /* cost of an add instruction */
273 COSTS_N_INSNS (1), /* cost of a lea instruction */
274 COSTS_N_INSNS (3), /* variable shift costs */
275 COSTS_N_INSNS (2), /* constant shift costs */
276 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
277 COSTS_N_INSNS (12), /* HI */
278 COSTS_N_INSNS (12), /* SI */
279 COSTS_N_INSNS (12), /* DI */
280 COSTS_N_INSNS (12)}, /* other */
281 1, /* cost of multiply per each bit set */
282 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
283 COSTS_N_INSNS (40), /* HI */
284 COSTS_N_INSNS (40), /* SI */
285 COSTS_N_INSNS (40), /* DI */
286 COSTS_N_INSNS (40)}, /* other */
287 COSTS_N_INSNS (3), /* cost of movsx */
288 COSTS_N_INSNS (2), /* cost of movzx */
289 15, /* "large" insn */
290 3, /* MOVE_RATIO */
25e22b19 291 3, /* CLEAR_RATIO */
64766e8d
JH
292 {2, 4, 2}, /* cost of loading integer registers
293 in QImode, HImode and SImode.
294 Relative to reg-reg move (2). */
295 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
296 {4, 8, 16, 32, 64}, /* cost of loading SSE register
297 in 32bit, 64bit, 128bit, 256bit and 512bit */
298 {4, 8, 16, 32, 64}, /* cost of storing SSE register
299 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 300 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 301 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
302 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
303 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
304 4, 4, /* Gather load static, per_elt. */
305 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
306 4, /* size of l1 cache. 486 has 8kB cache
307 shared for code and data, so 4kB is
308 not really precise. */
309 4, /* size of l2 cache */
310 0, /* size of prefetch block */
311 0, /* number of parallel prefetches */
312 1, /* Branch cost */
313 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
314 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
315 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
316 COSTS_N_INSNS (3), /* cost of FABS instruction. */
317 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
318 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
6065f444 319
c53c148c 320 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
321 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
322 COSTS_N_INSNS (16), /* cost of MULSS instruction. */
323 COSTS_N_INSNS (16), /* cost of MULSD instruction. */
c53c148c
JH
324 COSTS_N_INSNS (16), /* cost of FMA SS instruction. */
325 COSTS_N_INSNS (16), /* cost of FMA SD instruction. */
6065f444
JH
326 COSTS_N_INSNS (73), /* cost of DIVSS instruction. */
327 COSTS_N_INSNS (74), /* cost of DIVSD instruction. */
328 COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */
329 COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */
64766e8d
JH
330 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
331 i486_memcpy,
332 i486_memset,
f6fd8f2b
JH
333 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
334 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
335 "16", /* Loop alignment. */
336 "16", /* Jump alignment. */
337 "0:0:8", /* Label alignment. */
338 "16", /* Func alignment. */
64766e8d
JH
339};
340
341static stringop_algs pentium_memcpy[2] = {
342 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
343 DUMMY_STRINGOP_ALGS};
344static stringop_algs pentium_memset[2] = {
345 {libcall, {{-1, rep_prefix_4_byte, false}}},
346 DUMMY_STRINGOP_ALGS};
347
348static const
349struct processor_costs pentium_cost = {
72bb85f8 350 {
d321551c
L
351 /* Start of register allocator costs. integer->integer move cost is 2. */
352 6, /* cost for loading QImode using movzbl */
353 {2, 4, 2}, /* cost of loading integer registers
354 in QImode, HImode and SImode.
355 Relative to reg-reg move (2). */
356 {2, 4, 2}, /* cost of storing integer registers */
357 2, /* cost of reg,reg fld/fst */
358 {2, 2, 6}, /* cost of loading fp registers
359 in SFmode, DFmode and XFmode */
360 {4, 4, 6}, /* cost of storing fp registers
361 in SFmode, DFmode and XFmode */
362 8, /* cost of moving MMX register */
363 {8, 8}, /* cost of loading MMX registers
364 in SImode and DImode */
365 {8, 8}, /* cost of storing MMX registers
366 in SImode and DImode */
367 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
368 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
369 in 32,64,128,256 and 512-bit */
370 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
371 in 32,64,128,256 and 512-bit */
372 3, 3, /* SSE->integer and integer->SSE moves */
373 /* End of register allocator costs. */
72bb85f8 374 },
d321551c 375
64766e8d
JH
376 COSTS_N_INSNS (1), /* cost of an add instruction */
377 COSTS_N_INSNS (1), /* cost of a lea instruction */
378 COSTS_N_INSNS (4), /* variable shift costs */
379 COSTS_N_INSNS (1), /* constant shift costs */
380 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
381 COSTS_N_INSNS (11), /* HI */
382 COSTS_N_INSNS (11), /* SI */
383 COSTS_N_INSNS (11), /* DI */
384 COSTS_N_INSNS (11)}, /* other */
385 0, /* cost of multiply per each bit set */
386 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
387 COSTS_N_INSNS (25), /* HI */
388 COSTS_N_INSNS (25), /* SI */
389 COSTS_N_INSNS (25), /* DI */
390 COSTS_N_INSNS (25)}, /* other */
391 COSTS_N_INSNS (3), /* cost of movsx */
392 COSTS_N_INSNS (2), /* cost of movzx */
393 8, /* "large" insn */
394 6, /* MOVE_RATIO */
25e22b19 395 6, /* CLEAR_RATIO */
64766e8d
JH
396 {2, 4, 2}, /* cost of loading integer registers
397 in QImode, HImode and SImode.
398 Relative to reg-reg move (2). */
399 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
400 {4, 8, 16, 32, 64}, /* cost of loading SSE register
401 in 32bit, 64bit, 128bit, 256bit and 512bit */
402 {4, 8, 16, 32, 64}, /* cost of storing SSE register
403 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 404 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 405 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
406 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
407 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
408 4, 4, /* Gather load static, per_elt. */
409 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
410 8, /* size of l1 cache. */
411 8, /* size of l2 cache */
412 0, /* size of prefetch block */
413 0, /* number of parallel prefetches */
414 2, /* Branch cost */
415 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
416 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
417 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
418 COSTS_N_INSNS (1), /* cost of FABS instruction. */
419 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
420 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
6065f444 421
c53c148c 422 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
423 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
424 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
425 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
c53c148c
JH
426 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
427 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
428 COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
429 COSTS_N_INSNS (39), /* cost of DIVSD instruction. */
430 COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */
431 COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */
64766e8d
JH
432 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
433 pentium_memcpy,
434 pentium_memset,
f6fd8f2b
JH
435 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
436 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
437 "16:8:8", /* Loop alignment. */
438 "16:8:8", /* Jump alignment. */
439 "0:0:8", /* Label alignment. */
440 "16", /* Func alignment. */
64766e8d
JH
441};
442
443static const
444struct processor_costs lakemont_cost = {
72bb85f8 445 {
d321551c
L
446 /* Start of register allocator costs. integer->integer move cost is 2. */
447 6, /* cost for loading QImode using movzbl */
448 {2, 4, 2}, /* cost of loading integer registers
449 in QImode, HImode and SImode.
450 Relative to reg-reg move (2). */
451 {2, 4, 2}, /* cost of storing integer registers */
452 2, /* cost of reg,reg fld/fst */
453 {2, 2, 6}, /* cost of loading fp registers
454 in SFmode, DFmode and XFmode */
455 {4, 4, 6}, /* cost of storing fp registers
456 in SFmode, DFmode and XFmode */
457 8, /* cost of moving MMX register */
458 {8, 8}, /* cost of loading MMX registers
459 in SImode and DImode */
460 {8, 8}, /* cost of storing MMX registers
461 in SImode and DImode */
462 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
463 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
464 in 32,64,128,256 and 512-bit */
465 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
466 in 32,64,128,256 and 512-bit */
467 3, 3, /* SSE->integer and integer->SSE moves */
468 /* End of register allocator costs. */
72bb85f8 469 },
d321551c 470
64766e8d
JH
471 COSTS_N_INSNS (1), /* cost of an add instruction */
472 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
473 COSTS_N_INSNS (1), /* variable shift costs */
474 COSTS_N_INSNS (1), /* constant shift costs */
475 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
476 COSTS_N_INSNS (11), /* HI */
477 COSTS_N_INSNS (11), /* SI */
478 COSTS_N_INSNS (11), /* DI */
479 COSTS_N_INSNS (11)}, /* other */
480 0, /* cost of multiply per each bit set */
481 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
482 COSTS_N_INSNS (25), /* HI */
483 COSTS_N_INSNS (25), /* SI */
484 COSTS_N_INSNS (25), /* DI */
485 COSTS_N_INSNS (25)}, /* other */
486 COSTS_N_INSNS (3), /* cost of movsx */
487 COSTS_N_INSNS (2), /* cost of movzx */
488 8, /* "large" insn */
489 17, /* MOVE_RATIO */
25e22b19 490 6, /* CLEAR_RATIO */
64766e8d
JH
491 {2, 4, 2}, /* cost of loading integer registers
492 in QImode, HImode and SImode.
493 Relative to reg-reg move (2). */
494 {2, 4, 2}, /* cost of storing integer registers */
d321551c
L
495 {4, 8, 16, 32, 64}, /* cost of loading SSE register
496 in 32bit, 64bit, 128bit, 256bit and 512bit */
497 {4, 8, 16, 32, 64}, /* cost of storing SSE register
498 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 499 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 500 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
501 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
502 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
503 4, 4, /* Gather load static, per_elt. */
504 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
505 8, /* size of l1 cache. */
506 8, /* size of l2 cache */
507 0, /* size of prefetch block */
508 0, /* number of parallel prefetches */
509 2, /* Branch cost */
510 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
511 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
512 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
513 COSTS_N_INSNS (1), /* cost of FABS instruction. */
514 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
515 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
6065f444 516
c53c148c 517 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
518 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
519 COSTS_N_INSNS (5), /* cost of MULSS instruction. */
520 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
521 COSTS_N_INSNS (10), /* cost of FMA SS instruction. */
522 COSTS_N_INSNS (10), /* cost of FMA SD instruction. */
6065f444
JH
523 COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
524 COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
525 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
526 COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
64766e8d
JH
527 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
528 pentium_memcpy,
529 pentium_memset,
f6fd8f2b
JH
530 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
531 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
532 "16:8:8", /* Loop alignment. */
533 "16:8:8", /* Jump alignment. */
534 "0:0:8", /* Label alignment. */
535 "16", /* Func alignment. */
64766e8d
JH
536};
537
538/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
539 (we ensure the alignment). For small blocks inline loop is still a
540 noticeable win, for bigger blocks either rep movsl or rep movsb is
541 way to go. Rep movsb has apparently more expensive startup time in CPU,
542 but after 4K the difference is down in the noise. */
543static stringop_algs pentiumpro_memcpy[2] = {
544 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
545 {8192, rep_prefix_4_byte, false},
546 {-1, rep_prefix_1_byte, false}}},
547 DUMMY_STRINGOP_ALGS};
548static stringop_algs pentiumpro_memset[2] = {
549 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
550 {8192, rep_prefix_4_byte, false},
551 {-1, libcall, false}}},
552 DUMMY_STRINGOP_ALGS};
553static const
554struct processor_costs pentiumpro_cost = {
72bb85f8 555 {
d321551c
L
556 /* Start of register allocator costs. integer->integer move cost is 2. */
557 2, /* cost for loading QImode using movzbl */
558 {4, 4, 4}, /* cost of loading integer registers
559 in QImode, HImode and SImode.
560 Relative to reg-reg move (2). */
561 {2, 2, 2}, /* cost of storing integer registers */
562 2, /* cost of reg,reg fld/fst */
563 {2, 2, 6}, /* cost of loading fp registers
564 in SFmode, DFmode and XFmode */
565 {4, 4, 6}, /* cost of storing fp registers
566 in SFmode, DFmode and XFmode */
567 2, /* cost of moving MMX register */
568 {2, 2}, /* cost of loading MMX registers
569 in SImode and DImode */
570 {2, 2}, /* cost of storing MMX registers
571 in SImode and DImode */
572 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
573 {4, 8, 16, 32, 64}, /* cost of loading SSE registers
574 in 32,64,128,256 and 512-bit */
575 {4, 8, 16, 32, 64}, /* cost of storing SSE registers
576 in 32,64,128,256 and 512-bit */
577 3, 3, /* SSE->integer and integer->SSE moves */
578 /* End of register allocator costs. */
72bb85f8 579 },
d321551c 580
64766e8d
JH
581 COSTS_N_INSNS (1), /* cost of an add instruction */
582 COSTS_N_INSNS (1), /* cost of a lea instruction */
583 COSTS_N_INSNS (1), /* variable shift costs */
584 COSTS_N_INSNS (1), /* constant shift costs */
585 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
586 COSTS_N_INSNS (4), /* HI */
587 COSTS_N_INSNS (4), /* SI */
588 COSTS_N_INSNS (4), /* DI */
589 COSTS_N_INSNS (4)}, /* other */
590 0, /* cost of multiply per each bit set */
591 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
592 COSTS_N_INSNS (17), /* HI */
593 COSTS_N_INSNS (17), /* SI */
594 COSTS_N_INSNS (17), /* DI */
595 COSTS_N_INSNS (17)}, /* other */
596 COSTS_N_INSNS (1), /* cost of movsx */
597 COSTS_N_INSNS (1), /* cost of movzx */
598 8, /* "large" insn */
599 6, /* MOVE_RATIO */
25e22b19 600 6, /* CLEAR_RATIO */
64766e8d
JH
601 {4, 4, 4}, /* cost of loading integer registers
602 in QImode, HImode and SImode.
603 Relative to reg-reg move (2). */
604 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
605 {4, 8, 16, 32, 64}, /* cost of loading SSE register
606 in 32bit, 64bit, 128bit, 256bit and 512bit */
607 {4, 8, 16, 32, 64}, /* cost of storing SSE register
608 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 609 {4, 8, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 610 {4, 8, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
611 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
612 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
613 4, 4, /* Gather load static, per_elt. */
614 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
615 8, /* size of l1 cache. */
616 256, /* size of l2 cache */
617 32, /* size of prefetch block */
618 6, /* number of parallel prefetches */
619 2, /* Branch cost */
620 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
621 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
622 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
623 COSTS_N_INSNS (2), /* cost of FABS instruction. */
624 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
625 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
6065f444 626
c53c148c 627 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
628 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
629 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
630 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
631 COSTS_N_INSNS (7), /* cost of FMA SS instruction. */
632 COSTS_N_INSNS (7), /* cost of FMA SD instruction. */
6065f444
JH
633 COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
634 COSTS_N_INSNS (18), /* cost of DIVSD instruction. */
635 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
636 COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */
64766e8d
JH
637 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
638 pentiumpro_memcpy,
639 pentiumpro_memset,
f6fd8f2b
JH
640 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
641 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
642 "16", /* Loop alignment. */
643 "16:11:8", /* Jump alignment. */
644 "0:0:8", /* Label alignment. */
645 "16", /* Func alignment. */
64766e8d
JH
646};
647
648static stringop_algs geode_memcpy[2] = {
649 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651static stringop_algs geode_memset[2] = {
652 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654static const
655struct processor_costs geode_cost = {
72bb85f8 656 {
d321551c
L
657 /* Start of register allocator costs. integer->integer move cost is 2. */
658 2, /* cost for loading QImode using movzbl */
659 {2, 2, 2}, /* cost of loading integer registers
660 in QImode, HImode and SImode.
661 Relative to reg-reg move (2). */
662 {2, 2, 2}, /* cost of storing integer registers */
663 2, /* cost of reg,reg fld/fst */
664 {2, 2, 2}, /* cost of loading fp registers
665 in SFmode, DFmode and XFmode */
666 {4, 6, 6}, /* cost of storing fp registers
667 in SFmode, DFmode and XFmode */
668 2, /* cost of moving MMX register */
669 {2, 2}, /* cost of loading MMX registers
670 in SImode and DImode */
671 {2, 2}, /* cost of storing MMX registers
672 in SImode and DImode */
673 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
674 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
675 in 32,64,128,256 and 512-bit */
676 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
677 in 32,64,128,256 and 512-bit */
678 6, 6, /* SSE->integer and integer->SSE moves */
679 /* End of register allocator costs. */
72bb85f8 680 },
d321551c 681
64766e8d
JH
682 COSTS_N_INSNS (1), /* cost of an add instruction */
683 COSTS_N_INSNS (1), /* cost of a lea instruction */
684 COSTS_N_INSNS (2), /* variable shift costs */
685 COSTS_N_INSNS (1), /* constant shift costs */
686 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
687 COSTS_N_INSNS (4), /* HI */
688 COSTS_N_INSNS (7), /* SI */
689 COSTS_N_INSNS (7), /* DI */
690 COSTS_N_INSNS (7)}, /* other */
691 0, /* cost of multiply per each bit set */
692 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
693 COSTS_N_INSNS (23), /* HI */
694 COSTS_N_INSNS (39), /* SI */
695 COSTS_N_INSNS (39), /* DI */
696 COSTS_N_INSNS (39)}, /* other */
697 COSTS_N_INSNS (1), /* cost of movsx */
698 COSTS_N_INSNS (1), /* cost of movzx */
699 8, /* "large" insn */
700 4, /* MOVE_RATIO */
25e22b19 701 4, /* CLEAR_RATIO */
df41dbaf 702 {2, 2, 2}, /* cost of loading integer registers
64766e8d
JH
703 in QImode, HImode and SImode.
704 Relative to reg-reg move (2). */
df41dbaf 705 {2, 2, 2}, /* cost of storing integer registers */
d321551c
L
706 {2, 2, 8, 16, 32}, /* cost of loading SSE register
707 in 32bit, 64bit, 128bit, 256bit and 512bit */
708 {2, 2, 8, 16, 32}, /* cost of storing SSE register
709 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 710 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
df41dbaf 711 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
712 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
713 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
714 2, 2, /* Gather load static, per_elt. */
715 2, 2, /* Gather store static, per_elt. */
64766e8d
JH
716 64, /* size of l1 cache. */
717 128, /* size of l2 cache. */
718 32, /* size of prefetch block */
719 1, /* number of parallel prefetches */
720 1, /* Branch cost */
721 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
722 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
723 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
724 COSTS_N_INSNS (1), /* cost of FABS instruction. */
725 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
726 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
6065f444 727
c53c148c 728 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
729 COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
730 COSTS_N_INSNS (11), /* cost of MULSS instruction. */
731 COSTS_N_INSNS (11), /* cost of MULSD instruction. */
c53c148c
JH
732 COSTS_N_INSNS (17), /* cost of FMA SS instruction. */
733 COSTS_N_INSNS (17), /* cost of FMA SD instruction. */
6065f444
JH
734 COSTS_N_INSNS (47), /* cost of DIVSS instruction. */
735 COSTS_N_INSNS (47), /* cost of DIVSD instruction. */
736 COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */
737 COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */
64766e8d
JH
738 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
739 geode_memcpy,
740 geode_memset,
f6fd8f2b
JH
741 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
742 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
743 NULL, /* Loop alignment. */
744 NULL, /* Jump alignment. */
745 NULL, /* Label alignment. */
746 NULL, /* Func alignment. */
64766e8d
JH
747};
748
749static stringop_algs k6_memcpy[2] = {
750 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
751 DUMMY_STRINGOP_ALGS};
752static stringop_algs k6_memset[2] = {
753 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
754 DUMMY_STRINGOP_ALGS};
755static const
756struct processor_costs k6_cost = {
72bb85f8 757 {
d321551c
L
758 /* Start of register allocator costs. integer->integer move cost is 2. */
759 3, /* cost for loading QImode using movzbl */
760 {4, 5, 4}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {2, 3, 2}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {6, 6, 6}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {4, 4, 4}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {2, 2}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {2, 2}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
775 {2, 2, 8, 16, 32}, /* cost of loading SSE registers
776 in 32,64,128,256 and 512-bit */
777 {2, 2, 8, 16, 32}, /* cost of storing SSE registers
778 in 32,64,128,256 and 512-bit */
779 6, 6, /* SSE->integer and integer->SSE moves */
780 /* End of register allocator costs. */
72bb85f8 781 },
d321551c 782
64766e8d
JH
783 COSTS_N_INSNS (1), /* cost of an add instruction */
784 COSTS_N_INSNS (2), /* cost of a lea instruction */
785 COSTS_N_INSNS (1), /* variable shift costs */
786 COSTS_N_INSNS (1), /* constant shift costs */
787 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
788 COSTS_N_INSNS (3), /* HI */
789 COSTS_N_INSNS (3), /* SI */
790 COSTS_N_INSNS (3), /* DI */
791 COSTS_N_INSNS (3)}, /* other */
792 0, /* cost of multiply per each bit set */
793 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
794 COSTS_N_INSNS (18), /* HI */
795 COSTS_N_INSNS (18), /* SI */
796 COSTS_N_INSNS (18), /* DI */
797 COSTS_N_INSNS (18)}, /* other */
798 COSTS_N_INSNS (2), /* cost of movsx */
799 COSTS_N_INSNS (2), /* cost of movzx */
800 8, /* "large" insn */
801 4, /* MOVE_RATIO */
25e22b19 802 4, /* CLEAR_RATIO */
64766e8d
JH
803 {4, 5, 4}, /* cost of loading integer registers
804 in QImode, HImode and SImode.
805 Relative to reg-reg move (2). */
806 {2, 3, 2}, /* cost of storing integer registers */
d321551c
L
807 {2, 2, 8, 16, 32}, /* cost of loading SSE register
808 in 32bit, 64bit, 128bit, 256bit and 512bit */
809 {2, 2, 8, 16, 32}, /* cost of storing SSE register
810 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 811 {2, 2, 8, 16, 32}, /* cost of unaligned loads. */
df41dbaf 812 {2, 2, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
813 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
814 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
815 2, 2, /* Gather load static, per_elt. */
816 2, 2, /* Gather store static, per_elt. */
64766e8d
JH
817 32, /* size of l1 cache. */
818 32, /* size of l2 cache. Some models
819 have integrated l2 cache, but
820 optimizing for k6 is not important
821 enough to worry about that. */
822 32, /* size of prefetch block */
823 1, /* number of parallel prefetches */
824 1, /* Branch cost */
825 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
826 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
827 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
828 COSTS_N_INSNS (2), /* cost of FABS instruction. */
829 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
830 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
6065f444 831
c53c148c 832 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
833 COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */
834 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
835 COSTS_N_INSNS (2), /* cost of MULSD instruction. */
c53c148c
JH
836 COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
837 COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
6065f444
JH
838 COSTS_N_INSNS (56), /* cost of DIVSS instruction. */
839 COSTS_N_INSNS (56), /* cost of DIVSD instruction. */
840 COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */
841 COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */
64766e8d
JH
842 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
843 k6_memcpy,
844 k6_memset,
f6fd8f2b
JH
845 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
846 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
847 "32:8:8", /* Loop alignment. */
848 "32:8:8", /* Jump alignment. */
849 "0:0:8", /* Label alignment. */
850 "32", /* Func alignment. */
64766e8d
JH
851};
852
853/* For some reason, Athlon deals better with REP prefix (relative to loops)
854 compared to K8. Alignment becomes important after 8 bytes for memcpy and
855 128 bytes for memset. */
856static stringop_algs athlon_memcpy[2] = {
857 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
858 DUMMY_STRINGOP_ALGS};
859static stringop_algs athlon_memset[2] = {
860 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
861 DUMMY_STRINGOP_ALGS};
862static const
863struct processor_costs athlon_cost = {
72bb85f8 864 {
d321551c
L
865 /* Start of register allocator costs. integer->integer move cost is 2. */
866 4, /* cost for loading QImode using movzbl */
867 {3, 4, 3}, /* cost of loading integer registers
868 in QImode, HImode and SImode.
869 Relative to reg-reg move (2). */
870 {3, 4, 3}, /* cost of storing integer registers */
871 4, /* cost of reg,reg fld/fst */
872 {4, 4, 12}, /* cost of loading fp registers
873 in SFmode, DFmode and XFmode */
874 {6, 6, 8}, /* cost of storing fp registers
875 in SFmode, DFmode and XFmode */
876 2, /* cost of moving MMX register */
877 {4, 4}, /* cost of loading MMX registers
878 in SImode and DImode */
879 {4, 4}, /* cost of storing MMX registers
880 in SImode and DImode */
881 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
882 {4, 4, 12, 12, 24}, /* cost of loading SSE registers
883 in 32,64,128,256 and 512-bit */
884 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
885 in 32,64,128,256 and 512-bit */
886 5, 5, /* SSE->integer and integer->SSE moves */
887 /* End of register allocator costs. */
72bb85f8 888 },
d321551c 889
64766e8d
JH
890 COSTS_N_INSNS (1), /* cost of an add instruction */
891 COSTS_N_INSNS (2), /* cost of a lea instruction */
892 COSTS_N_INSNS (1), /* variable shift costs */
893 COSTS_N_INSNS (1), /* constant shift costs */
894 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
895 COSTS_N_INSNS (5), /* HI */
896 COSTS_N_INSNS (5), /* SI */
897 COSTS_N_INSNS (5), /* DI */
898 COSTS_N_INSNS (5)}, /* other */
899 0, /* cost of multiply per each bit set */
900 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
901 COSTS_N_INSNS (26), /* HI */
902 COSTS_N_INSNS (42), /* SI */
903 COSTS_N_INSNS (74), /* DI */
904 COSTS_N_INSNS (74)}, /* other */
905 COSTS_N_INSNS (1), /* cost of movsx */
906 COSTS_N_INSNS (1), /* cost of movzx */
907 8, /* "large" insn */
908 9, /* MOVE_RATIO */
25e22b19 909 6, /* CLEAR_RATIO */
64766e8d
JH
910 {3, 4, 3}, /* cost of loading integer registers
911 in QImode, HImode and SImode.
912 Relative to reg-reg move (2). */
913 {3, 4, 3}, /* cost of storing integer registers */
d321551c
L
914 {4, 4, 12, 12, 24}, /* cost of loading SSE register
915 in 32bit, 64bit, 128bit, 256bit and 512bit */
916 {4, 4, 10, 10, 20}, /* cost of storing SSE register
917 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 918 {4, 4, 12, 12, 24}, /* cost of unaligned loads. */
b7167993 919 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
d321551c
L
920 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
921 5, /* cost of moving SSE register to integer. */
a4fe6139
JH
922 4, 4, /* Gather load static, per_elt. */
923 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
924 64, /* size of l1 cache. */
925 256, /* size of l2 cache. */
926 64, /* size of prefetch block */
927 6, /* number of parallel prefetches */
928 5, /* Branch cost */
929 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
930 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
931 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
932 COSTS_N_INSNS (2), /* cost of FABS instruction. */
933 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
934 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 935
c53c148c 936 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
937 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
938 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
939 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
940 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
941 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
942 /* 11-16 */
943 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
944 COSTS_N_INSNS (24), /* cost of DIVSD instruction. */
945 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
946 COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */
64766e8d
JH
947 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
948 athlon_memcpy,
949 athlon_memset,
f6fd8f2b
JH
950 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
951 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
952 "16:8:8", /* Loop alignment. */
953 "16:8:8", /* Jump alignment. */
954 "0:0:8", /* Label alignment. */
955 "16", /* Func alignment. */
64766e8d
JH
956};
957
958/* K8 has optimized REP instruction for medium sized blocks, but for very
959 small blocks it is better to use loop. For large blocks, libcall can
960 do nontemporary accesses and beat inline considerably. */
961static stringop_algs k8_memcpy[2] = {
962 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
963 {-1, rep_prefix_4_byte, false}}},
964 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
965 {-1, libcall, false}}}};
966static stringop_algs k8_memset[2] = {
967 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
968 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
969 {libcall, {{48, unrolled_loop, false},
970 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
971static const
972struct processor_costs k8_cost = {
72bb85f8 973 {
d321551c
L
974 /* Start of register allocator costs. integer->integer move cost is 2. */
975 4, /* cost for loading QImode using movzbl */
976 {3, 4, 3}, /* cost of loading integer registers
977 in QImode, HImode and SImode.
978 Relative to reg-reg move (2). */
979 {3, 4, 3}, /* cost of storing integer registers */
980 4, /* cost of reg,reg fld/fst */
981 {4, 4, 12}, /* cost of loading fp registers
982 in SFmode, DFmode and XFmode */
983 {6, 6, 8}, /* cost of storing fp registers
984 in SFmode, DFmode and XFmode */
985 2, /* cost of moving MMX register */
986 {3, 3}, /* cost of loading MMX registers
987 in SImode and DImode */
988 {4, 4}, /* cost of storing MMX registers
989 in SImode and DImode */
990 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
991 {4, 3, 12, 12, 24}, /* cost of loading SSE registers
992 in 32,64,128,256 and 512-bit */
993 {4, 4, 10, 10, 20}, /* cost of storing SSE registers
994 in 32,64,128,256 and 512-bit */
995 5, 5, /* SSE->integer and integer->SSE moves */
996 /* End of register allocator costs. */
72bb85f8 997 },
d321551c 998
64766e8d
JH
999 COSTS_N_INSNS (1), /* cost of an add instruction */
1000 COSTS_N_INSNS (2), /* cost of a lea instruction */
1001 COSTS_N_INSNS (1), /* variable shift costs */
1002 COSTS_N_INSNS (1), /* constant shift costs */
1003 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1004 COSTS_N_INSNS (4), /* HI */
1005 COSTS_N_INSNS (3), /* SI */
1006 COSTS_N_INSNS (4), /* DI */
1007 COSTS_N_INSNS (5)}, /* other */
1008 0, /* cost of multiply per each bit set */
1009 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1010 COSTS_N_INSNS (26), /* HI */
1011 COSTS_N_INSNS (42), /* SI */
1012 COSTS_N_INSNS (74), /* DI */
1013 COSTS_N_INSNS (74)}, /* other */
1014 COSTS_N_INSNS (1), /* cost of movsx */
1015 COSTS_N_INSNS (1), /* cost of movzx */
1016 8, /* "large" insn */
1017 9, /* MOVE_RATIO */
25e22b19 1018 6, /* CLEAR_RATIO */
64766e8d
JH
1019 {3, 4, 3}, /* cost of loading integer registers
1020 in QImode, HImode and SImode.
1021 Relative to reg-reg move (2). */
1022 {3, 4, 3}, /* cost of storing integer registers */
d321551c
L
1023 {4, 3, 12, 12, 24}, /* cost of loading SSE register
1024 in 32bit, 64bit, 128bit, 256bit and 512bit */
1025 {4, 4, 10, 10, 20}, /* cost of storing SSE register
1026 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1027 {4, 3, 12, 12, 24}, /* cost of unaligned loads. */
b7167993 1028 {4, 4, 10, 10, 20}, /* cost of unaligned stores. */
d321551c
L
1029 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1030 5, /* cost of moving SSE register to integer. */
a4fe6139
JH
1031 4, 4, /* Gather load static, per_elt. */
1032 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
1033 64, /* size of l1 cache. */
1034 512, /* size of l2 cache. */
1035 64, /* size of prefetch block */
1036 /* New AMD processors never drop prefetches; if they cannot be performed
1037 immediately, they are queued. We set number of simultaneous prefetches
1038 to a large constant to reflect this (it probably is not a good idea not
1039 to limit number of prefetches at all, as their execution also takes some
1040 time). */
1041 100, /* number of parallel prefetches */
1042 3, /* Branch cost */
1043 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1044 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1045 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1046 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1047 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1048 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1049
c53c148c 1050 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1051 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1052 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1053 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1054 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
1055 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
1056 /* 11-16 */
1057 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
1058 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
1059 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
1060 COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
64766e8d
JH
1061 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1062 k8_memcpy,
1063 k8_memset,
f6fd8f2b
JH
1064 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1065 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1066 "16:8:8", /* Loop alignment. */
1067 "16:8:8", /* Jump alignment. */
1068 "0:0:8", /* Label alignment. */
1069 "16", /* Func alignment. */
64766e8d
JH
1070};
1071
1072/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1073 very small blocks it is better to use loop. For large blocks, libcall can
1074 do nontemporary accesses and beat inline considerably. */
1075static stringop_algs amdfam10_memcpy[2] = {
1076 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1077 {-1, rep_prefix_4_byte, false}}},
1078 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1079 {-1, libcall, false}}}};
1080static stringop_algs amdfam10_memset[2] = {
1081 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1082 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1083 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1084 {-1, libcall, false}}}};
1085struct processor_costs amdfam10_cost = {
72bb85f8 1086 {
d321551c 1087 /* Start of register allocator costs. integer->integer move cost is 2. */
64766e8d
JH
1088 4, /* cost for loading QImode using movzbl */
1089 {3, 4, 3}, /* cost of loading integer registers
1090 in QImode, HImode and SImode.
1091 Relative to reg-reg move (2). */
1092 {3, 4, 3}, /* cost of storing integer registers */
1093 4, /* cost of reg,reg fld/fst */
1094 {4, 4, 12}, /* cost of loading fp registers
1095 in SFmode, DFmode and XFmode */
1096 {6, 6, 8}, /* cost of storing fp registers
1097 in SFmode, DFmode and XFmode */
1098 2, /* cost of moving MMX register */
1099 {3, 3}, /* cost of loading MMX registers
1100 in SImode and DImode */
1101 {4, 4}, /* cost of storing MMX registers
1102 in SImode and DImode */
df41dbaf
JH
1103 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1104 {4, 4, 3, 6, 12}, /* cost of loading SSE registers
1105 in 32,64,128,256 and 512-bit */
df41dbaf
JH
1106 {4, 4, 5, 10, 20}, /* cost of storing SSE registers
1107 in 32,64,128,256 and 512-bit */
df41dbaf 1108 3, 3, /* SSE->integer and integer->SSE moves */
d321551c 1109
64766e8d
JH
1110 /* On K8:
1111 MOVD reg64, xmmreg Double FSTORE 4
1112 MOVD reg32, xmmreg Double FSTORE 4
1113 On AMDFAM10:
1114 MOVD reg64, xmmreg Double FADD 3
1115 1/1 1/1
1116 MOVD reg32, xmmreg Double FADD 3
1117 1/1 1/1 */
d321551c 1118 /* End of register allocator costs. */
72bb85f8 1119 },
d321551c
L
1120
1121 COSTS_N_INSNS (1), /* cost of an add instruction */
1122 COSTS_N_INSNS (2), /* cost of a lea instruction */
1123 COSTS_N_INSNS (1), /* variable shift costs */
1124 COSTS_N_INSNS (1), /* constant shift costs */
1125 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1126 COSTS_N_INSNS (4), /* HI */
1127 COSTS_N_INSNS (3), /* SI */
1128 COSTS_N_INSNS (4), /* DI */
1129 COSTS_N_INSNS (5)}, /* other */
1130 0, /* cost of multiply per each bit set */
1131 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1132 COSTS_N_INSNS (35), /* HI */
1133 COSTS_N_INSNS (51), /* SI */
1134 COSTS_N_INSNS (83), /* DI */
1135 COSTS_N_INSNS (83)}, /* other */
1136 COSTS_N_INSNS (1), /* cost of movsx */
1137 COSTS_N_INSNS (1), /* cost of movzx */
1138 8, /* "large" insn */
1139 9, /* MOVE_RATIO */
25e22b19 1140 6, /* CLEAR_RATIO */
d321551c
L
1141 {3, 4, 3}, /* cost of loading integer registers
1142 in QImode, HImode and SImode.
1143 Relative to reg-reg move (2). */
1144 {3, 4, 3}, /* cost of storing integer registers */
1145 {4, 4, 3, 6, 12}, /* cost of loading SSE register
1146 in 32bit, 64bit, 128bit, 256bit and 512bit */
1147 {4, 4, 5, 10, 20}, /* cost of storing SSE register
1148 in 32bit, 64bit, 128bit, 256bit and 512bit */
1149 {4, 4, 3, 7, 12}, /* cost of unaligned loads. */
1150 {4, 4, 5, 10, 20}, /* cost of unaligned stores. */
1151 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1152 3, /* cost of moving SSE register to integer. */
a4fe6139
JH
1153 4, 4, /* Gather load static, per_elt. */
1154 4, 4, /* Gather store static, per_elt. */
64766e8d
JH
1155 64, /* size of l1 cache. */
1156 512, /* size of l2 cache. */
1157 64, /* size of prefetch block */
1158 /* New AMD processors never drop prefetches; if they cannot be performed
1159 immediately, they are queued. We set number of simultaneous prefetches
1160 to a large constant to reflect this (it probably is not a good idea not
1161 to limit number of prefetches at all, as their execution also takes some
1162 time). */
1163 100, /* number of parallel prefetches */
1164 2, /* Branch cost */
1165 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1171
c53c148c 1172 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1173 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1174 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1175 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1176 COSTS_N_INSNS (8), /* cost of FMA SS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FMA SD instruction. */
6065f444
JH
1178 /* 11-16 */
1179 COSTS_N_INSNS (16), /* cost of DIVSS instruction. */
1180 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
1181 COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */
1182 COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */
64766e8d
JH
1183 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1184 amdfam10_memcpy,
1185 amdfam10_memset,
f6fd8f2b
JH
1186 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
1187 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1188 "32:25:8", /* Loop alignment. */
1189 "32:8:8", /* Jump alignment. */
1190 "0:0:8", /* Label alignment. */
1191 "32", /* Func alignment. */
64766e8d
JH
1192};
1193
c727b835 1194/* BDVER has optimized REP instruction for medium sized blocks, but for
64766e8d
JH
1195 very small blocks it is better to use loop. For large blocks, libcall
1196 can do nontemporary accesses and beat inline considerably. */
c727b835 1197static stringop_algs bdver_memcpy[2] = {
64766e8d
JH
1198 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1199 {-1, rep_prefix_4_byte, false}}},
1200 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1201 {-1, libcall, false}}}};
c727b835 1202static stringop_algs bdver_memset[2] = {
64766e8d
JH
1203 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1204 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1205 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1206 {-1, libcall, false}}}};
1207
c727b835 1208const struct processor_costs bdver_cost = {
72bb85f8 1209 {
d321551c
L
1210 /* Start of register allocator costs. integer->integer move cost is 2. */
1211 8, /* cost for loading QImode using movzbl */
1212 {8, 8, 8}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {8, 8, 8}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 28}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {10, 10, 18}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 4, /* cost of moving MMX register */
1222 {12, 12}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {10, 10}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1227 {12, 12, 10, 40, 60}, /* cost of loading SSE registers
1228 in 32,64,128,256 and 512-bit */
1229 {10, 10, 10, 40, 60}, /* cost of storing SSE registers
1230 in 32,64,128,256 and 512-bit */
1231 16, 20, /* SSE->integer and integer->SSE moves */
1232 /* End of register allocator costs. */
72bb85f8 1233 },
d321551c 1234
64766e8d
JH
1235 COSTS_N_INSNS (1), /* cost of an add instruction */
1236 COSTS_N_INSNS (1), /* cost of a lea instruction */
1237 COSTS_N_INSNS (1), /* variable shift costs */
1238 COSTS_N_INSNS (1), /* constant shift costs */
1239 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1240 COSTS_N_INSNS (4), /* HI */
1241 COSTS_N_INSNS (4), /* SI */
1242 COSTS_N_INSNS (6), /* DI */
1243 COSTS_N_INSNS (6)}, /* other */
1244 0, /* cost of multiply per each bit set */
1245 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1246 COSTS_N_INSNS (35), /* HI */
1247 COSTS_N_INSNS (51), /* SI */
1248 COSTS_N_INSNS (83), /* DI */
1249 COSTS_N_INSNS (83)}, /* other */
1250 COSTS_N_INSNS (1), /* cost of movsx */
1251 COSTS_N_INSNS (1), /* cost of movzx */
1252 8, /* "large" insn */
1253 9, /* MOVE_RATIO */
25e22b19 1254 6, /* CLEAR_RATIO */
df41dbaf 1255 {8, 8, 8}, /* cost of loading integer registers
64766e8d
JH
1256 in QImode, HImode and SImode.
1257 Relative to reg-reg move (2). */
df41dbaf 1258 {8, 8, 8}, /* cost of storing integer registers */
d321551c
L
1259 {12, 12, 10, 40, 60}, /* cost of loading SSE register
1260 in 32bit, 64bit, 128bit, 256bit and 512bit */
1261 {10, 10, 10, 40, 60}, /* cost of storing SSE register
1262 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1263 {12, 12, 10, 40, 60}, /* cost of unaligned loads. */
b7167993 1264 {10, 10, 10, 40, 60}, /* cost of unaligned stores. */
d321551c
L
1265 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1266 16, /* cost of moving SSE register to integer. */
a4fe6139
JH
1267 12, 12, /* Gather load static, per_elt. */
1268 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
1269 16, /* size of l1 cache. */
1270 2048, /* size of l2 cache. */
1271 64, /* size of prefetch block */
1272 /* New AMD processors never drop prefetches; if they cannot be performed
1273 immediately, they are queued. We set number of simultaneous prefetches
1274 to a large constant to reflect this (it probably is not a good idea not
1275 to limit number of prefetches at all, as their execution also takes some
1276 time). */
1277 100, /* number of parallel prefetches */
1278 2, /* Branch cost */
1279 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1280 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1281 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1282 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1283 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1284 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
6065f444 1285
c53c148c 1286 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1287 COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */
1288 COSTS_N_INSNS (6), /* cost of MULSS instruction. */
1289 COSTS_N_INSNS (6), /* cost of MULSD instruction. */
c53c148c
JH
1290 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
1291 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
1292 /* 9-24 */
1293 COSTS_N_INSNS (24), /* cost of DIVSS instruction. */
1294 /* 9-27 */
1295 COSTS_N_INSNS (27), /* cost of DIVSD instruction. */
1296 COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */
1297 COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */
64766e8d 1298 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
c727b835
RB
1299 bdver_memcpy,
1300 bdver_memset,
f6fd8f2b
JH
1301 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1302 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1303 "16:11:8", /* Loop alignment. */
1304 "16:8:8", /* Jump alignment. */
1305 "0:0:8", /* Label alignment. */
1306 "11", /* Func alignment. */
64766e8d
JH
1307};
1308
1309
1310/* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1311 very small blocks it is better to use loop. For large blocks, libcall
1312 can do nontemporary accesses and beat inline considerably. */
1313static stringop_algs znver1_memcpy[2] = {
1314 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1315 {-1, rep_prefix_4_byte, false}}},
1316 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1317 {-1, libcall, false}}}};
1318static stringop_algs znver1_memset[2] = {
1319 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1320 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1321 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1322 {-1, libcall, false}}}};
1323struct processor_costs znver1_cost = {
72bb85f8 1324 {
d321551c
L
1325 /* Start of register allocator costs. integer->integer move cost is 2. */
1326
1327 /* reg-reg moves are done by renaming and thus they are even cheaper than
1328 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond
1329 to doubles of latencies, we do not model this correctly. It does not
1330 seem to make practical difference to bump prices up even more. */
1331 6, /* cost for loading QImode using
1332 movzbl. */
1333 {6, 6, 6}, /* cost of loading integer registers
1334 in QImode, HImode and SImode.
1335 Relative to reg-reg move (2). */
1336 {8, 8, 8}, /* cost of storing integer
1337 registers. */
1338 2, /* cost of reg,reg fld/fst. */
1339 {6, 6, 16}, /* cost of loading fp registers
1340 in SFmode, DFmode and XFmode. */
1341 {8, 8, 16}, /* cost of storing fp registers
1342 in SFmode, DFmode and XFmode. */
1343 2, /* cost of moving MMX register. */
1344 {6, 6}, /* cost of loading MMX registers
1345 in SImode and DImode. */
1346 {8, 8}, /* cost of storing MMX registers
1347 in SImode and DImode. */
1348 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1349 {6, 6, 6, 12, 24}, /* cost of loading SSE registers
1350 in 32,64,128,256 and 512-bit. */
1351 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
1352 in 32,64,128,256 and 512-bit. */
1353 6, 6, /* SSE->integer and integer->SSE moves. */
1354 /* End of register allocator costs. */
72bb85f8 1355 },
d321551c 1356
64766e8d
JH
1357 COSTS_N_INSNS (1), /* cost of an add instruction. */
1358 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1359 COSTS_N_INSNS (1), /* variable shift costs. */
1360 COSTS_N_INSNS (1), /* constant shift costs. */
1361 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1362 COSTS_N_INSNS (3), /* HI. */
1363 COSTS_N_INSNS (3), /* SI. */
6065f444
JH
1364 COSTS_N_INSNS (3), /* DI. */
1365 COSTS_N_INSNS (3)}, /* other. */
64766e8d
JH
1366 0, /* cost of multiply per each bit
1367 set. */
6065f444
JH
1368 /* Depending on parameters, idiv can get faster on ryzen. This is upper
1369 bound. */
1370 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
1371 COSTS_N_INSNS (22), /* HI. */
1372 COSTS_N_INSNS (30), /* SI. */
1373 COSTS_N_INSNS (45), /* DI. */
1374 COSTS_N_INSNS (45)}, /* other. */
64766e8d
JH
1375 COSTS_N_INSNS (1), /* cost of movsx. */
1376 COSTS_N_INSNS (1), /* cost of movzx. */
1377 8, /* "large" insn. */
1378 9, /* MOVE_RATIO. */
25e22b19 1379 6, /* CLEAR_RATIO */
01118373 1380 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
1381 in QImode, HImode and SImode.
1382 Relative to reg-reg move (2). */
01118373 1383 {8, 8, 8}, /* cost of storing integer
64766e8d 1384 registers. */
d321551c
L
1385 {6, 6, 6, 12, 24}, /* cost of loading SSE register
1386 in 32bit, 64bit, 128bit, 256bit and 512bit */
1387 {8, 8, 8, 16, 32}, /* cost of storing SSE register
1388 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1389 {6, 6, 6, 12, 24}, /* cost of unaligned loads. */
b7167993 1390 {8, 8, 8, 16, 32}, /* cost of unaligned stores. */
d321551c
L
1391 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */
1392 6, /* cost of moving SSE register to integer. */
a4fe6139
JH
1393 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1394 throughput 12. Approx 9 uops do not depend on vector size and every load
1395 is 7 uops. */
1396 18, 8, /* Gather load static, per_elt. */
1397 18, 10, /* Gather store static, per_elt. */
64766e8d
JH
1398 32, /* size of l1 cache. */
1399 512, /* size of l2 cache. */
1400 64, /* size of prefetch block. */
1401 /* New AMD processors never drop prefetches; if they cannot be performed
1402 immediately, they are queued. We set number of simultaneous prefetches
1403 to a large constant to reflect this (it probably is not a good idea not
1404 to limit number of prefetches at all, as their execution also takes some
1405 time). */
1406 100, /* number of parallel prefetches. */
1407 3, /* Branch cost. */
6065f444
JH
1408 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1410 /* Latency of fdiv is 8-15. */
1411 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1412 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1413 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1414 /* Latency of fsqrt is 4-10. */
1415 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1416
c53c148c 1417 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
1418 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1419 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
1420 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1421 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1422 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
1423 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1424 /* 9-13 */
1425 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1426 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1427 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
64766e8d
JH
1428 /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
1429 and it can execute 2 integer additions and 2 multiplications thus
1430 reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
1431 that 4 works better than 6 probably due to register pressure.
1432
1433 Integer vector operations are taken by FP unit and execute 3 vector
1434 plus/minus operations per cycle but only one multiply. This is adjusted
1435 in ix86_reassociation_width. */
1436 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1437 znver1_memcpy,
1438 znver1_memset,
f6fd8f2b
JH
1439 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1440 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1441 "16", /* Loop alignment. */
1442 "16", /* Jump alignment. */
1443 "0:0:8", /* Label alignment. */
1444 "16", /* Func alignment. */
64766e8d
JH
1445};
1446
2901f42f
VK
1447/* ZNVER2 has optimized REP instruction for medium sized blocks, but for
1448 very small blocks it is better to use loop. For large blocks, libcall
1449 can do nontemporary accesses and beat inline considerably. */
1450static stringop_algs znver2_memcpy[2] = {
1451 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1452 {-1, rep_prefix_4_byte, false}}},
187dd65d 1453 {libcall, {{16, loop, false}, {64, rep_prefix_4_byte, false},
2901f42f
VK
1454 {-1, libcall, false}}}};
1455static stringop_algs znver2_memset[2] = {
1456 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1457 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
187dd65d 1458 {libcall, {{24, rep_prefix_4_byte, false}, {128, rep_prefix_8_byte, false},
2901f42f
VK
1459 {-1, libcall, false}}}};
1460
1461struct processor_costs znver2_cost = {
72bb85f8 1462 {
d321551c 1463 /* Start of register allocator costs. integer->integer move cost is 2. */
2901f42f
VK
1464
1465 /* reg-reg moves are done by renaming and thus they are even cheaper than
1466 1 cycle. Because reg-reg move cost is 2 and following tables correspond
1467 to doubles of latencies, we do not model this correctly. It does not
1468 seem to make practical difference to bump prices up even more. */
1469 6, /* cost for loading QImode using
1470 movzbl. */
1471 {6, 6, 6}, /* cost of loading integer registers
1472 in QImode, HImode and SImode.
1473 Relative to reg-reg move (2). */
1474 {8, 8, 8}, /* cost of storing integer
1475 registers. */
1476 2, /* cost of reg,reg fld/fst. */
1477 {6, 6, 16}, /* cost of loading fp registers
1478 in SFmode, DFmode and XFmode. */
1479 {8, 8, 16}, /* cost of storing fp registers
1480 in SFmode, DFmode and XFmode. */
1481 2, /* cost of moving MMX register. */
1482 {6, 6}, /* cost of loading MMX registers
1483 in SImode and DImode. */
1484 {8, 8}, /* cost of storing MMX registers
1485 in SImode and DImode. */
187dd65d 1486 2, 2, 3, /* cost of moving XMM,YMM,ZMM
2901f42f 1487 register. */
187dd65d 1488 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
2901f42f 1489 in 32,64,128,256 and 512-bit. */
2901f42f
VK
1490 {8, 8, 8, 8, 16}, /* cost of storing SSE registers
1491 in 32,64,128,256 and 512-bit. */
2901f42f
VK
1492 6, 6, /* SSE->integer and integer->SSE
1493 moves. */
d321551c 1494 /* End of register allocator costs. */
72bb85f8 1495 },
d321551c
L
1496
1497 COSTS_N_INSNS (1), /* cost of an add instruction. */
1498 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1499 COSTS_N_INSNS (1), /* variable shift costs. */
1500 COSTS_N_INSNS (1), /* constant shift costs. */
1501 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1502 COSTS_N_INSNS (3), /* HI. */
1503 COSTS_N_INSNS (3), /* SI. */
1504 COSTS_N_INSNS (3), /* DI. */
1505 COSTS_N_INSNS (3)}, /* other. */
1506 0, /* cost of multiply per each bit
1507 set. */
1508 /* Depending on parameters, idiv can get faster on ryzen. This is upper
1509 bound. */
1510 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */
1511 COSTS_N_INSNS (22), /* HI. */
1512 COSTS_N_INSNS (30), /* SI. */
1513 COSTS_N_INSNS (45), /* DI. */
1514 COSTS_N_INSNS (45)}, /* other. */
1515 COSTS_N_INSNS (1), /* cost of movsx. */
1516 COSTS_N_INSNS (1), /* cost of movzx. */
1517 8, /* "large" insn. */
1518 9, /* MOVE_RATIO. */
25e22b19 1519 6, /* CLEAR_RATIO */
d321551c
L
1520 {6, 6, 6}, /* cost of loading integer registers
1521 in QImode, HImode and SImode.
1522 Relative to reg-reg move (2). */
1523 {8, 8, 8}, /* cost of storing integer
1524 registers. */
1525 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
1526 in 32bit, 64bit, 128bit, 256bit and 512bit */
1527 {8, 8, 8, 8, 16}, /* cost of storing SSE register
1528 in 32bit, 64bit, 128bit, 256bit and 512bit */
1529 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
1530 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
1531 2, 2, 3, /* cost of moving XMM,YMM,ZMM
1532 register. */
1533 6, /* cost of moving SSE register to integer. */
2901f42f
VK
1534 /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
1535 throughput 12. Approx 9 uops do not depend on vector size and every load
1536 is 7 uops. */
1537 18, 8, /* Gather load static, per_elt. */
1538 18, 10, /* Gather store static, per_elt. */
1539 32, /* size of l1 cache. */
1540 512, /* size of l2 cache. */
1541 64, /* size of prefetch block. */
1542 /* New AMD processors never drop prefetches; if they cannot be performed
1543 immediately, they are queued. We set number of simultaneous prefetches
1544 to a large constant to reflect this (it probably is not a good idea not
1545 to limit number of prefetches at all, as their execution also takes some
1546 time). */
1547 100, /* number of parallel prefetches. */
1548 3, /* Branch cost. */
1549 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1550 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1551 /* Latency of fdiv is 8-15. */
1552 COSTS_N_INSNS (15), /* cost of FDIV instruction. */
1553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1555 /* Latency of fsqrt is 4-10. */
1556 COSTS_N_INSNS (10), /* cost of FSQRT instruction. */
1557
1558 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1559 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1560 COSTS_N_INSNS (3), /* cost of MULSS instruction. */
187dd65d 1561 COSTS_N_INSNS (3), /* cost of MULSD instruction. */
2901f42f
VK
1562 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1563 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
1564 COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
1565 /* 9-13. */
1566 COSTS_N_INSNS (13), /* cost of DIVSD instruction. */
1567 COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */
1568 COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */
1569 /* Zen can execute 4 integer operations per cycle. FP operations
1570 take 3 cycles and it can execute 2 integer additions and 2
1571 multiplications thus reassociation may make sense up to with of 6.
1572 SPEC2k6 bencharks suggests
1573 that 4 works better than 6 probably due to register pressure.
1574
1575 Integer vector operations are taken by FP unit and execute 3 vector
1576 plus/minus operations per cycle but only one multiply. This is adjusted
1577 in ix86_reassociation_width. */
1578 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */
1579 znver2_memcpy,
1580 znver2_memset,
1581 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
1582 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
1583 "16", /* Loop alignment. */
1584 "16", /* Jump alignment. */
1585 "0:0:8", /* Label alignment. */
1586 "16", /* Func alignment. */
1587};
1588
c234d831
UB
1589/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
1590static stringop_algs skylake_memcpy[2] = {
1591 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
6e559c70 1592 {libcall, {{16, loop, false}, {512, unrolled_loop, false},
c234d831
UB
1593 {-1, libcall, false}}}};
1594
1595static stringop_algs skylake_memset[2] = {
1596 {libcall, {{6, loop_1_byte, true},
1597 {24, loop, true},
1598 {8192, rep_prefix_4_byte, true},
1599 {-1, libcall, false}}},
6e559c70 1600 {libcall, {{24, loop, true}, {512, unrolled_loop, false},
c234d831
UB
1601 {-1, libcall, false}}}};
1602
1603static const
1604struct processor_costs skylake_cost = {
72bb85f8 1605 {
d321551c
L
1606 /* Start of register allocator costs. integer->integer move cost is 2. */
1607 6, /* cost for loading QImode using movzbl */
1608 {4, 4, 4}, /* cost of loading integer registers
1609 in QImode, HImode and SImode.
1610 Relative to reg-reg move (2). */
7706f2f3 1611 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
1612 2, /* cost of reg,reg fld/fst */
1613 {6, 6, 8}, /* cost of loading fp registers
1614 in SFmode, DFmode and XFmode */
1615 {6, 6, 10}, /* cost of storing fp registers
1616 in SFmode, DFmode and XFmode */
1617 2, /* cost of moving MMX register */
1618 {6, 6}, /* cost of loading MMX registers
1619 in SImode and DImode */
1620 {6, 6}, /* cost of storing MMX registers
1621 in SImode and DImode */
1622 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1623 {6, 6, 6, 10, 20}, /* cost of loading SSE registers
1624 in 32,64,128,256 and 512-bit */
1625 {8, 8, 8, 12, 24}, /* cost of storing SSE registers
1626 in 32,64,128,256 and 512-bit */
4e9ad7c9 1627 6, 6, /* SSE->integer and integer->SSE moves */
d321551c 1628 /* End of register allocator costs. */
72bb85f8 1629 },
d321551c 1630
c234d831
UB
1631 COSTS_N_INSNS (1), /* cost of an add instruction */
1632 COSTS_N_INSNS (1)+1, /* cost of a lea instruction */
1633 COSTS_N_INSNS (1), /* variable shift costs */
1634 COSTS_N_INSNS (1), /* constant shift costs */
1635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1636 COSTS_N_INSNS (4), /* HI */
1637 COSTS_N_INSNS (3), /* SI */
a2ef9558
MT
1638 COSTS_N_INSNS (3), /* DI */
1639 COSTS_N_INSNS (3)}, /* other */
c234d831 1640 0, /* cost of multiply per each bit set */
02308bd3
MT
1641 /* Expanding div/mod currently doesn't consider parallelism. So the cost
1642 model is not realistic. We compensate by increasing the latencies a bit. */
1643 {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
1644 COSTS_N_INSNS (11), /* HI */
1645 COSTS_N_INSNS (14), /* SI */
c234d831
UB
1646 COSTS_N_INSNS (76), /* DI */
1647 COSTS_N_INSNS (76)}, /* other */
1648 COSTS_N_INSNS (1), /* cost of movsx */
1649 COSTS_N_INSNS (0), /* cost of movzx */
1650 8, /* "large" insn */
1651 17, /* MOVE_RATIO */
25e22b19 1652 6, /* CLEAR_RATIO */
c234d831
UB
1653 {4, 4, 4}, /* cost of loading integer registers
1654 in QImode, HImode and SImode.
1655 Relative to reg-reg move (2). */
101a0841 1656 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
1657 {6, 6, 6, 10, 20}, /* cost of loading SSE register
1658 in 32bit, 64bit, 128bit, 256bit and 512bit */
1659 {8, 8, 8, 12, 24}, /* cost of storing SSE register
1660 in 32bit, 64bit, 128bit, 256bit and 512bit */
c234d831 1661 {6, 6, 6, 10, 20}, /* cost of unaligned loads. */
c234d831 1662 {8, 8, 8, 8, 16}, /* cost of unaligned stores. */
d321551c
L
1663 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
1664 2, /* cost of moving SSE register to integer. */
c234d831
UB
1665 20, 8, /* Gather load static, per_elt. */
1666 22, 10, /* Gather store static, per_elt. */
1667 64, /* size of l1 cache. */
1668 512, /* size of l2 cache. */
1669 64, /* size of prefetch block */
1670 6, /* number of parallel prefetches */
1671 3, /* Branch cost */
1672 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1673 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1674 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1675 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1676 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1677 COSTS_N_INSNS (20), /* cost of FSQRT instruction. */
1678
1679 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
1680 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1681 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
1682 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
1683 COSTS_N_INSNS (4), /* cost of FMA SS instruction. */
1684 COSTS_N_INSNS (4), /* cost of FMA SD instruction. */
1685 COSTS_N_INSNS (11), /* cost of DIVSS instruction. */
1686 COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
1687 COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */
1688 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
1689 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
1690 skylake_memcpy,
1691 skylake_memset,
1692 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
1693 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1694 "16:11:8", /* Loop alignment. */
1695 "16:11:8", /* Jump alignment. */
1696 "0:0:8", /* Label alignment. */
1697 "16", /* Func alignment. */
c234d831 1698};
64766e8d
JH
1699 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1700 very small blocks it is better to use loop. For large blocks, libcall can
1701 do nontemporary accesses and beat inline considerably. */
1702static stringop_algs btver1_memcpy[2] = {
1703 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1704 {-1, rep_prefix_4_byte, false}}},
1705 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1706 {-1, libcall, false}}}};
1707static stringop_algs btver1_memset[2] = {
1708 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1709 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1710 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1711 {-1, libcall, false}}}};
1712const struct processor_costs btver1_cost = {
72bb85f8 1713 {
d321551c
L
1714 /* Start of register allocator costs. integer->integer move cost is 2. */
1715 8, /* cost for loading QImode using movzbl */
1716 {6, 8, 6}, /* cost of loading integer registers
1717 in QImode, HImode and SImode.
1718 Relative to reg-reg move (2). */
1719 {6, 8, 6}, /* cost of storing integer registers */
1720 4, /* cost of reg,reg fld/fst */
1721 {12, 12, 28}, /* cost of loading fp registers
1722 in SFmode, DFmode and XFmode */
1723 {12, 12, 38}, /* cost of storing fp registers
1724 in SFmode, DFmode and XFmode */
1725 4, /* cost of moving MMX register */
1726 {10, 10}, /* cost of loading MMX registers
1727 in SImode and DImode */
1728 {12, 12}, /* cost of storing MMX registers
1729 in SImode and DImode */
1730 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1731 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1732 in 32,64,128,256 and 512-bit */
1733 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1734 in 32,64,128,256 and 512-bit */
1735 14, 14, /* SSE->integer and integer->SSE moves */
1736 /* End of register allocator costs. */
72bb85f8 1737 },
d321551c 1738
64766e8d
JH
1739 COSTS_N_INSNS (1), /* cost of an add instruction */
1740 COSTS_N_INSNS (2), /* cost of a lea instruction */
1741 COSTS_N_INSNS (1), /* variable shift costs */
1742 COSTS_N_INSNS (1), /* constant shift costs */
1743 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1744 COSTS_N_INSNS (4), /* HI */
1745 COSTS_N_INSNS (3), /* SI */
1746 COSTS_N_INSNS (4), /* DI */
1747 COSTS_N_INSNS (5)}, /* other */
1748 0, /* cost of multiply per each bit set */
1749 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1750 COSTS_N_INSNS (35), /* HI */
1751 COSTS_N_INSNS (51), /* SI */
1752 COSTS_N_INSNS (83), /* DI */
1753 COSTS_N_INSNS (83)}, /* other */
1754 COSTS_N_INSNS (1), /* cost of movsx */
1755 COSTS_N_INSNS (1), /* cost of movzx */
1756 8, /* "large" insn */
1757 9, /* MOVE_RATIO */
25e22b19 1758 6, /* CLEAR_RATIO */
df41dbaf 1759 {6, 8, 6}, /* cost of loading integer registers
64766e8d
JH
1760 in QImode, HImode and SImode.
1761 Relative to reg-reg move (2). */
df41dbaf 1762 {6, 8, 6}, /* cost of storing integer registers */
d321551c
L
1763 {10, 10, 12, 48, 96}, /* cost of loading SSE register
1764 in 32bit, 64bit, 128bit, 256bit and 512bit */
1765 {10, 10, 12, 48, 96}, /* cost of storing SSE register
1766 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1767 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
b7167993 1768 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
d321551c
L
1769 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1770 14, /* cost of moving SSE register to integer. */
a4fe6139
JH
1771 10, 10, /* Gather load static, per_elt. */
1772 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
1773 32, /* size of l1 cache. */
1774 512, /* size of l2 cache. */
1775 64, /* size of prefetch block */
1776 100, /* number of parallel prefetches */
1777 2, /* Branch cost */
1778 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1779 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1780 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1781 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1782 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1783 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1784
c53c148c 1785 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
1786 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1787 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
1788 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1789 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1790 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
1791 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
1792 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
1793 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
1794 COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */
64766e8d
JH
1795 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1796 btver1_memcpy,
1797 btver1_memset,
f6fd8f2b
JH
1798 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
1799 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1800 "16:11:8", /* Loop alignment. */
1801 "16:8:8", /* Jump alignment. */
1802 "0:0:8", /* Label alignment. */
1803 "11", /* Func alignment. */
64766e8d
JH
1804};
1805
1806static stringop_algs btver2_memcpy[2] = {
1807 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1808 {-1, rep_prefix_4_byte, false}}},
1809 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1810 {-1, libcall, false}}}};
1811static stringop_algs btver2_memset[2] = {
1812 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1813 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1814 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1815 {-1, libcall, false}}}};
1816const struct processor_costs btver2_cost = {
72bb85f8 1817 {
d321551c
L
1818 /* Start of register allocator costs. integer->integer move cost is 2. */
1819 8, /* cost for loading QImode using movzbl */
1820 {8, 8, 6}, /* cost of loading integer registers
1821 in QImode, HImode and SImode.
1822 Relative to reg-reg move (2). */
1823 {8, 8, 6}, /* cost of storing integer registers */
1824 4, /* cost of reg,reg fld/fst */
1825 {12, 12, 28}, /* cost of loading fp registers
1826 in SFmode, DFmode and XFmode */
1827 {12, 12, 38}, /* cost of storing fp registers
1828 in SFmode, DFmode and XFmode */
1829 4, /* cost of moving MMX register */
1830 {10, 10}, /* cost of loading MMX registers
1831 in SImode and DImode */
1832 {12, 12}, /* cost of storing MMX registers
1833 in SImode and DImode */
1834 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1835 {10, 10, 12, 48, 96}, /* cost of loading SSE registers
1836 in 32,64,128,256 and 512-bit */
1837 {10, 10, 12, 48, 96}, /* cost of storing SSE registers
1838 in 32,64,128,256 and 512-bit */
1839 14, 14, /* SSE->integer and integer->SSE moves */
1840 /* End of register allocator costs. */
72bb85f8 1841 },
d321551c 1842
64766e8d
JH
1843 COSTS_N_INSNS (1), /* cost of an add instruction */
1844 COSTS_N_INSNS (2), /* cost of a lea instruction */
1845 COSTS_N_INSNS (1), /* variable shift costs */
1846 COSTS_N_INSNS (1), /* constant shift costs */
1847 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1848 COSTS_N_INSNS (4), /* HI */
1849 COSTS_N_INSNS (3), /* SI */
1850 COSTS_N_INSNS (4), /* DI */
1851 COSTS_N_INSNS (5)}, /* other */
1852 0, /* cost of multiply per each bit set */
1853 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1854 COSTS_N_INSNS (35), /* HI */
1855 COSTS_N_INSNS (51), /* SI */
1856 COSTS_N_INSNS (83), /* DI */
1857 COSTS_N_INSNS (83)}, /* other */
1858 COSTS_N_INSNS (1), /* cost of movsx */
1859 COSTS_N_INSNS (1), /* cost of movzx */
1860 8, /* "large" insn */
1861 9, /* MOVE_RATIO */
25e22b19 1862 6, /* CLEAR_RATIO */
df41dbaf 1863 {8, 8, 6}, /* cost of loading integer registers
64766e8d
JH
1864 in QImode, HImode and SImode.
1865 Relative to reg-reg move (2). */
df41dbaf 1866 {8, 8, 6}, /* cost of storing integer registers */
d321551c
L
1867 {10, 10, 12, 48, 96}, /* cost of loading SSE register
1868 in 32bit, 64bit, 128bit, 256bit and 512bit */
1869 {10, 10, 12, 48, 96}, /* cost of storing SSE register
1870 in 32bit, 64bit, 128bit, 256bit and 512bit */
b7167993 1871 {10, 10, 12, 48, 96}, /* cost of unaligned loads. */
b7167993 1872 {10, 10, 12, 48, 96}, /* cost of unaligned stores. */
d321551c
L
1873 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
1874 14, /* cost of moving SSE register to integer. */
a4fe6139
JH
1875 10, 10, /* Gather load static, per_elt. */
1876 10, 10, /* Gather store static, per_elt. */
64766e8d
JH
1877 32, /* size of l1 cache. */
1878 2048, /* size of l2 cache. */
1879 64, /* size of prefetch block */
1880 100, /* number of parallel prefetches */
1881 2, /* Branch cost */
1882 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1883 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1884 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1885 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1886 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1887 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
6065f444 1888
c53c148c 1889 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
1890 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
1891 COSTS_N_INSNS (2), /* cost of MULSS instruction. */
1892 COSTS_N_INSNS (4), /* cost of MULSD instruction. */
c53c148c
JH
1893 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
1894 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
1895 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
1896 COSTS_N_INSNS (19), /* cost of DIVSD instruction. */
1897 COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */
1898 COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */
64766e8d
JH
1899 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
1900 btver2_memcpy,
1901 btver2_memset,
f6fd8f2b
JH
1902 COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
1903 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
1904 "16:11:8", /* Loop alignment. */
1905 "16:8:8", /* Jump alignment. */
1906 "0:0:8", /* Label alignment. */
1907 "11", /* Func alignment. */
64766e8d
JH
1908};
1909
1910static stringop_algs pentium4_memcpy[2] = {
1911 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1912 DUMMY_STRINGOP_ALGS};
1913static stringop_algs pentium4_memset[2] = {
1914 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1915 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1916 DUMMY_STRINGOP_ALGS};
1917
1918static const
1919struct processor_costs pentium4_cost = {
72bb85f8 1920 {
d321551c 1921 /* Start of register allocator costs. integer->integer move cost is 2. */
df41dbaf 1922 5, /* cost for loading QImode using movzbl */
64766e8d
JH
1923 {4, 5, 4}, /* cost of loading integer registers
1924 in QImode, HImode and SImode.
1925 Relative to reg-reg move (2). */
1926 {2, 3, 2}, /* cost of storing integer registers */
df41dbaf
JH
1927 12, /* cost of reg,reg fld/fst */
1928 {14, 14, 14}, /* cost of loading fp registers
64766e8d 1929 in SFmode, DFmode and XFmode */
df41dbaf 1930 {14, 14, 14}, /* cost of storing fp registers
64766e8d 1931 in SFmode, DFmode and XFmode */
df41dbaf
JH
1932 12, /* cost of moving MMX register */
1933 {16, 16}, /* cost of loading MMX registers
64766e8d 1934 in SImode and DImode */
df41dbaf 1935 {16, 16}, /* cost of storing MMX registers
64766e8d 1936 in SImode and DImode */
df41dbaf
JH
1937 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
1938 {16, 16, 16, 32, 64}, /* cost of loading SSE registers
1939 in 32,64,128,256 and 512-bit */
d321551c
L
1940 {16, 16, 16, 32, 64}, /* cost of storing SSE registers
1941 in 32,64,128,256 and 512-bit */
1942 20, 12, /* SSE->integer and integer->SSE moves */
1943 /* End of register allocator costs. */
72bb85f8 1944 },
d321551c
L
1945
1946 COSTS_N_INSNS (1), /* cost of an add instruction */
1947 COSTS_N_INSNS (3), /* cost of a lea instruction */
1948 COSTS_N_INSNS (4), /* variable shift costs */
1949 COSTS_N_INSNS (4), /* constant shift costs */
1950 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1951 COSTS_N_INSNS (15), /* HI */
1952 COSTS_N_INSNS (15), /* SI */
1953 COSTS_N_INSNS (15), /* DI */
1954 COSTS_N_INSNS (15)}, /* other */
1955 0, /* cost of multiply per each bit set */
1956 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1957 COSTS_N_INSNS (56), /* HI */
1958 COSTS_N_INSNS (56), /* SI */
1959 COSTS_N_INSNS (56), /* DI */
1960 COSTS_N_INSNS (56)}, /* other */
1961 COSTS_N_INSNS (1), /* cost of movsx */
1962 COSTS_N_INSNS (1), /* cost of movzx */
1963 16, /* "large" insn */
1964 6, /* MOVE_RATIO */
25e22b19 1965 6, /* CLEAR_RATIO */
d321551c
L
1966 {4, 5, 4}, /* cost of loading integer registers
1967 in QImode, HImode and SImode.
1968 Relative to reg-reg move (2). */
1969 {2, 3, 2}, /* cost of storing integer registers */
1970 {16, 16, 16, 32, 64}, /* cost of loading SSE register
1971 in 32bit, 64bit, 128bit, 256bit and 512bit */
1972 {16, 16, 16, 32, 64}, /* cost of storing SSE register
1973 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 1974 {32, 32, 32, 64, 128}, /* cost of unaligned loads. */
df41dbaf 1975 {32, 32, 32, 64, 128}, /* cost of unaligned stores. */
d321551c
L
1976 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */
1977 20, /* cost of moving SSE register to integer. */
a4fe6139
JH
1978 16, 16, /* Gather load static, per_elt. */
1979 16, 16, /* Gather store static, per_elt. */
64766e8d
JH
1980 8, /* size of l1 cache. */
1981 256, /* size of l2 cache. */
1982 64, /* size of prefetch block */
1983 6, /* number of parallel prefetches */
1984 2, /* Branch cost */
1985 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1986 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1987 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1988 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1989 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1990 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
6065f444 1991
c53c148c 1992 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
1993 COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */
1994 COSTS_N_INSNS (6), /* cost of MULSS instruction. */
1995 COSTS_N_INSNS (6), /* cost of MULSD instruction. */
c53c148c
JH
1996 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
1997 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
1998 COSTS_N_INSNS (23), /* cost of DIVSS instruction. */
1999 COSTS_N_INSNS (38), /* cost of DIVSD instruction. */
2000 COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */
2001 COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */
64766e8d
JH
2002 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2003 pentium4_memcpy,
2004 pentium4_memset,
f6fd8f2b
JH
2005 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2006 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2007 NULL, /* Loop alignment. */
2008 NULL, /* Jump alignment. */
2009 NULL, /* Label alignment. */
2010 NULL, /* Func alignment. */
64766e8d
JH
2011};
2012
2013static stringop_algs nocona_memcpy[2] = {
2014 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
2015 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
2016 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
2017
2018static stringop_algs nocona_memset[2] = {
2019 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
2020 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2021 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
2022 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2023
2024static const
2025struct processor_costs nocona_cost = {
72bb85f8 2026 {
d321551c
L
2027 /* Start of register allocator costs. integer->integer move cost is 2. */
2028 4, /* cost for loading QImode using movzbl */
2029 {4, 4, 4}, /* cost of loading integer registers
2030 in QImode, HImode and SImode.
2031 Relative to reg-reg move (2). */
2032 {4, 4, 4}, /* cost of storing integer registers */
2033 12, /* cost of reg,reg fld/fst */
2034 {14, 14, 14}, /* cost of loading fp registers
2035 in SFmode, DFmode and XFmode */
2036 {14, 14, 14}, /* cost of storing fp registers
2037 in SFmode, DFmode and XFmode */
2038 14, /* cost of moving MMX register */
2039 {12, 12}, /* cost of loading MMX registers
2040 in SImode and DImode */
2041 {12, 12}, /* cost of storing MMX registers
2042 in SImode and DImode */
2043 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
2044 {12, 12, 12, 24, 48}, /* cost of loading SSE registers
2045 in 32,64,128,256 and 512-bit */
2046 {12, 12, 12, 24, 48}, /* cost of storing SSE registers
2047 in 32,64,128,256 and 512-bit */
2048 20, 12, /* SSE->integer and integer->SSE moves */
2049 /* End of register allocator costs. */
72bb85f8 2050 },
d321551c 2051
64766e8d
JH
2052 COSTS_N_INSNS (1), /* cost of an add instruction */
2053 COSTS_N_INSNS (1), /* cost of a lea instruction */
2054 COSTS_N_INSNS (1), /* variable shift costs */
2055 COSTS_N_INSNS (1), /* constant shift costs */
2056 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
2057 COSTS_N_INSNS (10), /* HI */
2058 COSTS_N_INSNS (10), /* SI */
2059 COSTS_N_INSNS (10), /* DI */
2060 COSTS_N_INSNS (10)}, /* other */
2061 0, /* cost of multiply per each bit set */
2062 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
2063 COSTS_N_INSNS (66), /* HI */
2064 COSTS_N_INSNS (66), /* SI */
2065 COSTS_N_INSNS (66), /* DI */
2066 COSTS_N_INSNS (66)}, /* other */
2067 COSTS_N_INSNS (1), /* cost of movsx */
2068 COSTS_N_INSNS (1), /* cost of movzx */
2069 16, /* "large" insn */
2070 17, /* MOVE_RATIO */
25e22b19 2071 6, /* CLEAR_RATIO */
64766e8d
JH
2072 {4, 4, 4}, /* cost of loading integer registers
2073 in QImode, HImode and SImode.
2074 Relative to reg-reg move (2). */
2075 {4, 4, 4}, /* cost of storing integer registers */
d321551c
L
2076 {12, 12, 12, 24, 48}, /* cost of loading SSE register
2077 in 32bit, 64bit, 128bit, 256bit and 512bit */
2078 {12, 12, 12, 24, 48}, /* cost of storing SSE register
2079 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2080 {24, 24, 24, 48, 96}, /* cost of unaligned loads. */
df41dbaf 2081 {24, 24, 24, 48, 96}, /* cost of unaligned stores. */
d321551c
L
2082 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */
2083 20, /* cost of moving SSE register to integer. */
a4fe6139
JH
2084 12, 12, /* Gather load static, per_elt. */
2085 12, 12, /* Gather store static, per_elt. */
64766e8d
JH
2086 8, /* size of l1 cache. */
2087 1024, /* size of l2 cache. */
2088 64, /* size of prefetch block */
2089 8, /* number of parallel prefetches */
2090 1, /* Branch cost */
2091 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
2092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2093 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
2094 COSTS_N_INSNS (3), /* cost of FABS instruction. */
2095 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
2096 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
6065f444 2097
c53c148c 2098 COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */
6065f444
JH
2099 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
2100 COSTS_N_INSNS (7), /* cost of MULSS instruction. */
2101 COSTS_N_INSNS (7), /* cost of MULSD instruction. */
c53c148c
JH
2102 COSTS_N_INSNS (7), /* cost of FMA SS instruction. */
2103 COSTS_N_INSNS (7), /* cost of FMA SD instruction. */
6065f444
JH
2104 COSTS_N_INSNS (32), /* cost of DIVSS instruction. */
2105 COSTS_N_INSNS (40), /* cost of DIVSD instruction. */
2106 COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */
2107 COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */
64766e8d
JH
2108 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2109 nocona_memcpy,
2110 nocona_memset,
f6fd8f2b
JH
2111 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2112 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2113 NULL, /* Loop alignment. */
2114 NULL, /* Jump alignment. */
2115 NULL, /* Label alignment. */
2116 NULL, /* Func alignment. */
64766e8d
JH
2117};
2118
2119static stringop_algs atom_memcpy[2] = {
2120 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2121 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2122 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2123static stringop_algs atom_memset[2] = {
2124 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2125 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2126 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2127 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2128static const
2129struct processor_costs atom_cost = {
72bb85f8 2130 {
d321551c
L
2131 /* Start of register allocator costs. integer->integer move cost is 2. */
2132 6, /* cost for loading QImode using movzbl */
2133 {6, 6, 6}, /* cost of loading integer registers
2134 in QImode, HImode and SImode.
2135 Relative to reg-reg move (2). */
2136 {6, 6, 6}, /* cost of storing integer registers */
2137 4, /* cost of reg,reg fld/fst */
2138 {6, 6, 18}, /* cost of loading fp registers
2139 in SFmode, DFmode and XFmode */
2140 {14, 14, 24}, /* cost of storing fp registers
2141 in SFmode, DFmode and XFmode */
2142 2, /* cost of moving MMX register */
2143 {8, 8}, /* cost of loading MMX registers
2144 in SImode and DImode */
2145 {10, 10}, /* cost of storing MMX registers
2146 in SImode and DImode */
2147 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2148 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2149 in 32,64,128,256 and 512-bit */
2150 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2151 in 32,64,128,256 and 512-bit */
2152 8, 6, /* SSE->integer and integer->SSE moves */
2153 /* End of register allocator costs. */
72bb85f8 2154 },
d321551c 2155
64766e8d
JH
2156 COSTS_N_INSNS (1), /* cost of an add instruction */
2157 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2158 COSTS_N_INSNS (1), /* variable shift costs */
2159 COSTS_N_INSNS (1), /* constant shift costs */
2160 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2161 COSTS_N_INSNS (4), /* HI */
2162 COSTS_N_INSNS (3), /* SI */
2163 COSTS_N_INSNS (4), /* DI */
2164 COSTS_N_INSNS (2)}, /* other */
2165 0, /* cost of multiply per each bit set */
2166 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2167 COSTS_N_INSNS (26), /* HI */
2168 COSTS_N_INSNS (42), /* SI */
2169 COSTS_N_INSNS (74), /* DI */
2170 COSTS_N_INSNS (74)}, /* other */
2171 COSTS_N_INSNS (1), /* cost of movsx */
2172 COSTS_N_INSNS (1), /* cost of movzx */
2173 8, /* "large" insn */
2174 17, /* MOVE_RATIO */
25e22b19 2175 6, /* CLEAR_RATIO */
df41dbaf 2176 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
2177 in QImode, HImode and SImode.
2178 Relative to reg-reg move (2). */
df41dbaf 2179 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2180 {8, 8, 8, 16, 32}, /* cost of loading SSE register
2181 in 32bit, 64bit, 128bit, 256bit and 512bit */
2182 {8, 8, 8, 16, 32}, /* cost of storing SSE register
2183 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2184 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 2185 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
2186 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2187 8, /* cost of moving SSE register to integer. */
a4fe6139
JH
2188 8, 8, /* Gather load static, per_elt. */
2189 8, 8, /* Gather store static, per_elt. */
64766e8d
JH
2190 32, /* size of l1 cache. */
2191 256, /* size of l2 cache. */
2192 64, /* size of prefetch block */
2193 6, /* number of parallel prefetches */
2194 3, /* Branch cost */
2195 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2196 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2197 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2198 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2199 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2200 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2201
c53c148c 2202 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2203 COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */
2204 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2205 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2206 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2207 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2208 COSTS_N_INSNS (31), /* cost of DIVSS instruction. */
2209 COSTS_N_INSNS (60), /* cost of DIVSD instruction. */
2210 COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */
2211 COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */
64766e8d
JH
2212 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
2213 atom_memcpy,
2214 atom_memset,
f6fd8f2b
JH
2215 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2216 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2217 "16", /* Loop alignment. */
2218 "16:8:8", /* Jump alignment. */
2219 "0:0:8", /* Label alignment. */
2220 "16", /* Func alignment. */
64766e8d
JH
2221};
2222
2223static stringop_algs slm_memcpy[2] = {
2224 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2225 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2226 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2227static stringop_algs slm_memset[2] = {
2228 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2229 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2230 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2231 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2232static const
2233struct processor_costs slm_cost = {
72bb85f8 2234 {
d321551c
L
2235 /* Start of register allocator costs. integer->integer move cost is 2. */
2236 8, /* cost for loading QImode using movzbl */
2237 {8, 8, 8}, /* cost of loading integer registers
2238 in QImode, HImode and SImode.
2239 Relative to reg-reg move (2). */
2240 {6, 6, 6}, /* cost of storing integer registers */
2241 2, /* cost of reg,reg fld/fst */
2242 {8, 8, 18}, /* cost of loading fp registers
2243 in SFmode, DFmode and XFmode */
2244 {6, 6, 18}, /* cost of storing fp registers
2245 in SFmode, DFmode and XFmode */
2246 2, /* cost of moving MMX register */
2247 {8, 8}, /* cost of loading MMX registers
2248 in SImode and DImode */
2249 {6, 6}, /* cost of storing MMX registers
2250 in SImode and DImode */
2251 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2252 {8, 8, 8, 16, 32}, /* cost of loading SSE registers
2253 in 32,64,128,256 and 512-bit */
2254 {8, 8, 8, 16, 32}, /* cost of storing SSE registers
2255 in 32,64,128,256 and 512-bit */
2256 8, 6, /* SSE->integer and integer->SSE moves */
2257 /* End of register allocator costs. */
72bb85f8 2258 },
d321551c 2259
64766e8d
JH
2260 COSTS_N_INSNS (1), /* cost of an add instruction */
2261 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2262 COSTS_N_INSNS (1), /* variable shift costs */
2263 COSTS_N_INSNS (1), /* constant shift costs */
2264 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2265 COSTS_N_INSNS (3), /* HI */
2266 COSTS_N_INSNS (3), /* SI */
2267 COSTS_N_INSNS (4), /* DI */
2268 COSTS_N_INSNS (2)}, /* other */
2269 0, /* cost of multiply per each bit set */
2270 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2271 COSTS_N_INSNS (26), /* HI */
2272 COSTS_N_INSNS (42), /* SI */
2273 COSTS_N_INSNS (74), /* DI */
2274 COSTS_N_INSNS (74)}, /* other */
2275 COSTS_N_INSNS (1), /* cost of movsx */
2276 COSTS_N_INSNS (1), /* cost of movzx */
2277 8, /* "large" insn */
2278 17, /* MOVE_RATIO */
25e22b19 2279 6, /* CLEAR_RATIO */
df41dbaf 2280 {8, 8, 8}, /* cost of loading integer registers
64766e8d
JH
2281 in QImode, HImode and SImode.
2282 Relative to reg-reg move (2). */
df41dbaf 2283 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2284 {8, 8, 8, 16, 32}, /* cost of loading SSE register
2285 in 32bit, 64bit, 128bit, 256bit and 512bit */
2286 {8, 8, 8, 16, 32}, /* cost of storing SSE register
2287 in SImode, DImode and TImode. */
df41dbaf 2288 {16, 16, 16, 32, 64}, /* cost of unaligned loads. */
df41dbaf 2289 {16, 16, 16, 32, 64}, /* cost of unaligned stores. */
d321551c
L
2290 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */
2291 8, /* cost of moving SSE register to integer. */
a4fe6139
JH
2292 8, 8, /* Gather load static, per_elt. */
2293 8, 8, /* Gather store static, per_elt. */
64766e8d
JH
2294 32, /* size of l1 cache. */
2295 256, /* size of l2 cache. */
2296 64, /* size of prefetch block */
2297 6, /* number of parallel prefetches */
2298 3, /* Branch cost */
2299 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2300 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2301 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2302 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2303 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2304 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2305
c53c148c 2306 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2307 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2308 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2309 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2310 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2311 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2312 COSTS_N_INSNS (39), /* cost of DIVSS instruction. */
2313 COSTS_N_INSNS (69), /* cost of DIVSD instruction. */
2314 COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
2315 COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
64766e8d
JH
2316 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2317 slm_memcpy,
2318 slm_memset,
f6fd8f2b
JH
2319 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2320 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2321 "16", /* Loop alignment. */
2322 "16:8:8", /* Jump alignment. */
2323 "0:0:8", /* Label alignment. */
2324 "16", /* Func alignment. */
64766e8d
JH
2325};
2326
2327static stringop_algs intel_memcpy[2] = {
2328 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
2329 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
2330 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2331static stringop_algs intel_memset[2] = {
2332 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
2333 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
2334 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
2335 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
2336static const
2337struct processor_costs intel_cost = {
72bb85f8 2338 {
d321551c
L
2339 /* Start of register allocator costs. integer->integer move cost is 2. */
2340 6, /* cost for loading QImode using movzbl */
2341 {4, 4, 4}, /* cost of loading integer registers
2342 in QImode, HImode and SImode.
2343 Relative to reg-reg move (2). */
2344 {6, 6, 6}, /* cost of storing integer registers */
2345 2, /* cost of reg,reg fld/fst */
2346 {6, 6, 8}, /* cost of loading fp registers
2347 in SFmode, DFmode and XFmode */
2348 {6, 6, 10}, /* cost of storing fp registers
2349 in SFmode, DFmode and XFmode */
2350 2, /* cost of moving MMX register */
2351 {6, 6}, /* cost of loading MMX registers
2352 in SImode and DImode */
2353 {6, 6}, /* cost of storing MMX registers
2354 in SImode and DImode */
2355 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
2356 {6, 6, 6, 6, 6}, /* cost of loading SSE registers
2357 in 32,64,128,256 and 512-bit */
2358 {6, 6, 6, 6, 6}, /* cost of storing SSE registers
2359 in 32,64,128,256 and 512-bit */
2360 4, 4, /* SSE->integer and integer->SSE moves */
2361 /* End of register allocator costs. */
72bb85f8 2362 },
d321551c 2363
64766e8d
JH
2364 COSTS_N_INSNS (1), /* cost of an add instruction */
2365 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2366 COSTS_N_INSNS (1), /* variable shift costs */
2367 COSTS_N_INSNS (1), /* constant shift costs */
2368 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2369 COSTS_N_INSNS (3), /* HI */
2370 COSTS_N_INSNS (3), /* SI */
2371 COSTS_N_INSNS (4), /* DI */
2372 COSTS_N_INSNS (2)}, /* other */
2373 0, /* cost of multiply per each bit set */
2374 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2375 COSTS_N_INSNS (26), /* HI */
2376 COSTS_N_INSNS (42), /* SI */
2377 COSTS_N_INSNS (74), /* DI */
2378 COSTS_N_INSNS (74)}, /* other */
2379 COSTS_N_INSNS (1), /* cost of movsx */
2380 COSTS_N_INSNS (1), /* cost of movzx */
2381 8, /* "large" insn */
2382 17, /* MOVE_RATIO */
25e22b19 2383 6, /* CLEAR_RATIO */
64766e8d
JH
2384 {4, 4, 4}, /* cost of loading integer registers
2385 in QImode, HImode and SImode.
2386 Relative to reg-reg move (2). */
af863030 2387 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2388 {6, 6, 6, 6, 6}, /* cost of loading SSE register
2389 in 32bit, 64bit, 128bit, 256bit and 512bit */
2390 {6, 6, 6, 6, 6}, /* cost of storing SSE register
2391 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2392 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
df41dbaf 2393 {10, 10, 10, 10, 10}, /* cost of unaligned loads. */
d321551c
L
2394 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */
2395 4, /* cost of moving SSE register to integer. */
a4fe6139
JH
2396 6, 6, /* Gather load static, per_elt. */
2397 6, 6, /* Gather store static, per_elt. */
64766e8d
JH
2398 32, /* size of l1 cache. */
2399 256, /* size of l2 cache. */
2400 64, /* size of prefetch block */
2401 6, /* number of parallel prefetches */
2402 3, /* Branch cost */
2403 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2404 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2405 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2406 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2407 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2408 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
6065f444 2409
3ff59baa 2410 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2411 COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */
2412 COSTS_N_INSNS (8), /* cost of MULSS instruction. */
2413 COSTS_N_INSNS (8), /* cost of MULSD instruction. */
c53c148c
JH
2414 COSTS_N_INSNS (6), /* cost of FMA SS instruction. */
2415 COSTS_N_INSNS (6), /* cost of FMA SD instruction. */
6065f444
JH
2416 COSTS_N_INSNS (20), /* cost of DIVSS instruction. */
2417 COSTS_N_INSNS (20), /* cost of DIVSD instruction. */
2418 COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */
2419 COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */
64766e8d
JH
2420 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
2421 intel_memcpy,
2422 intel_memset,
f6fd8f2b
JH
2423 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2424 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2425 "16", /* Loop alignment. */
2426 "16:8:8", /* Jump alignment. */
2427 "0:0:8", /* Label alignment. */
2428 "16", /* Func alignment. */
64766e8d
JH
2429};
2430
2431/* Generic should produce code tuned for Core-i7 (and newer chips)
2432 and btver1 (and newer chips). */
2433
2434static stringop_algs generic_memcpy[2] = {
2435 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
2436 {-1, libcall, false}}},
2437 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
2438 {-1, libcall, false}}}};
2439static stringop_algs generic_memset[2] = {
2440 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
2441 {-1, libcall, false}}},
2442 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
2443 {-1, libcall, false}}}};
2444static const
2445struct processor_costs generic_cost = {
72bb85f8 2446 {
d321551c
L
2447 /* Start of register allocator costs. integer->integer move cost is 2. */
2448 6, /* cost for loading QImode using movzbl */
2449 {6, 6, 6}, /* cost of loading integer registers
2450 in QImode, HImode and SImode.
2451 Relative to reg-reg move (2). */
2452 {6, 6, 6}, /* cost of storing integer registers */
2453 4, /* cost of reg,reg fld/fst */
2454 {6, 6, 12}, /* cost of loading fp registers
2455 in SFmode, DFmode and XFmode */
2456 {6, 6, 12}, /* cost of storing fp registers
2457 in SFmode, DFmode and XFmode */
2458 2, /* cost of moving MMX register */
2459 {6, 6}, /* cost of loading MMX registers
2460 in SImode and DImode */
2461 {6, 6}, /* cost of storing MMX registers
2462 in SImode and DImode */
2463 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2464 {6, 6, 6, 10, 15}, /* cost of loading SSE registers
2465 in 32,64,128,256 and 512-bit */
2466 {6, 6, 6, 10, 15}, /* cost of storing SSE registers
2467 in 32,64,128,256 and 512-bit */
2468 6, 6, /* SSE->integer and integer->SSE moves */
2469 /* End of register allocator costs. */
72bb85f8 2470 },
d321551c 2471
64766e8d 2472 COSTS_N_INSNS (1), /* cost of an add instruction */
ef9eec0b 2473 /* Setting cost to 2 makes our current implementation of synth_mult result in
64766e8d
JH
2474 use of unnecessary temporary registers causing regression on several
2475 SPECfp benchmarks. */
2476 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2477 COSTS_N_INSNS (1), /* variable shift costs */
2478 COSTS_N_INSNS (1), /* constant shift costs */
2479 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2480 COSTS_N_INSNS (4), /* HI */
2481 COSTS_N_INSNS (3), /* SI */
2482 COSTS_N_INSNS (4), /* DI */
7c080ade 2483 COSTS_N_INSNS (4)}, /* other */
64766e8d 2484 0, /* cost of multiply per each bit set */
7c080ade
JH
2485 {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */
2486 COSTS_N_INSNS (22), /* HI */
2487 COSTS_N_INSNS (30), /* SI */
64766e8d
JH
2488 COSTS_N_INSNS (74), /* DI */
2489 COSTS_N_INSNS (74)}, /* other */
2490 COSTS_N_INSNS (1), /* cost of movsx */
2491 COSTS_N_INSNS (1), /* cost of movzx */
2492 8, /* "large" insn */
2493 17, /* MOVE_RATIO */
25e22b19 2494 6, /* CLEAR_RATIO */
d555138e 2495 {6, 6, 6}, /* cost of loading integer registers
64766e8d
JH
2496 in QImode, HImode and SImode.
2497 Relative to reg-reg move (2). */
af863030 2498 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2499 {6, 6, 6, 10, 15}, /* cost of loading SSE register
2500 in 32bit, 64bit, 128bit, 256bit and 512bit */
2501 {6, 6, 6, 10, 15}, /* cost of storing SSE register
2502 in 32bit, 64bit, 128bit, 256bit and 512bit */
7c080ade 2503 {6, 6, 6, 10, 15}, /* cost of unaligned loads. */
7c080ade 2504 {6, 6, 6, 10, 15}, /* cost of unaligned storess. */
d321551c
L
2505 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */
2506 6, /* cost of moving SSE register to integer. */
7c080ade
JH
2507 18, 6, /* Gather load static, per_elt. */
2508 18, 6, /* Gather store static, per_elt. */
64766e8d
JH
2509 32, /* size of l1 cache. */
2510 512, /* size of l2 cache. */
2511 64, /* size of prefetch block */
2512 6, /* number of parallel prefetches */
2513 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2514 value is increased to perhaps more appropriate value of 5. */
2515 3, /* Branch cost */
ef9eec0b 2516 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
7c080ade 2517 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
e8e3054e 2518 COSTS_N_INSNS (17), /* cost of FDIV instruction. */
ef9eec0b
JH
2519 COSTS_N_INSNS (1), /* cost of FABS instruction. */
2520 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
e8e3054e 2521 COSTS_N_INSNS (14), /* cost of FSQRT instruction. */
6065f444 2522
ef9eec0b
JH
2523 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
2524 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2525 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2526 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
2527 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2528 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
e8e3054e
JH
2529 COSTS_N_INSNS (13), /* cost of DIVSS instruction. */
2530 COSTS_N_INSNS (17), /* cost of DIVSD instruction. */
2531 COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */
2532 COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */
7c080ade 2533 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
64766e8d
JH
2534 generic_memcpy,
2535 generic_memset,
e8e3054e
JH
2536 COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
2537 COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2538 "16:11:8", /* Loop alignment. */
2539 "16:11:8", /* Jump alignment. */
2540 "0:0:8", /* Label alignment. */
2541 "16", /* Func alignment. */
64766e8d
JH
2542};
2543
2544/* core_cost should produce code tuned for Core familly of CPUs. */
2545static stringop_algs core_memcpy[2] = {
2546 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
2547 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
2548 {-1, libcall, false}}}};
2549static stringop_algs core_memset[2] = {
2550 {libcall, {{6, loop_1_byte, true},
2551 {24, loop, true},
2552 {8192, rep_prefix_4_byte, true},
2553 {-1, libcall, false}}},
2554 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2555 {-1, libcall, false}}}};
2556
2557static const
2558struct processor_costs core_cost = {
72bb85f8 2559 {
d321551c
L
2560 /* Start of register allocator costs. integer->integer move cost is 2. */
2561 6, /* cost for loading QImode using movzbl */
2562 {4, 4, 4}, /* cost of loading integer registers
2563 in QImode, HImode and SImode.
2564 Relative to reg-reg move (2). */
2565 {6, 6, 6}, /* cost of storing integer registers */
2566 2, /* cost of reg,reg fld/fst */
2567 {6, 6, 8}, /* cost of loading fp registers
2568 in SFmode, DFmode and XFmode */
2569 {6, 6, 10}, /* cost of storing fp registers
2570 in SFmode, DFmode and XFmode */
2571 2, /* cost of moving MMX register */
2572 {6, 6}, /* cost of loading MMX registers
2573 in SImode and DImode */
2574 {6, 6}, /* cost of storing MMX registers
2575 in SImode and DImode */
2576 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2577 {6, 6, 6, 6, 12}, /* cost of loading SSE registers
2578 in 32,64,128,256 and 512-bit */
2579 {6, 6, 6, 6, 12}, /* cost of storing SSE registers
2580 in 32,64,128,256 and 512-bit */
4e9ad7c9 2581 6, 6, /* SSE->integer and integer->SSE moves */
d321551c 2582 /* End of register allocator costs. */
72bb85f8 2583 },
d321551c 2584
64766e8d
JH
2585 COSTS_N_INSNS (1), /* cost of an add instruction */
2586 /* On all chips taken into consideration lea is 2 cycles and more. With
2587 this cost however our current implementation of synth_mult results in
2588 use of unnecessary temporary registers causing regression on several
2589 SPECfp benchmarks. */
2590 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2591 COSTS_N_INSNS (1), /* variable shift costs */
2592 COSTS_N_INSNS (1), /* constant shift costs */
2593 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2594 COSTS_N_INSNS (4), /* HI */
2595 COSTS_N_INSNS (3), /* SI */
a2ef9558
MT
2596 /* Here we tune for Sandybridge or newer. */
2597 COSTS_N_INSNS (3), /* DI */
2598 COSTS_N_INSNS (3)}, /* other */
64766e8d 2599 0, /* cost of multiply per each bit set */
02308bd3
MT
2600 /* Expanding div/mod currently doesn't consider parallelism. So the cost
2601 model is not realistic. We compensate by increasing the latencies a bit. */
2602 {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
2603 COSTS_N_INSNS (11), /* HI */
2604 COSTS_N_INSNS (14), /* SI */
ffa3ce53
JH
2605 COSTS_N_INSNS (81), /* DI */
2606 COSTS_N_INSNS (81)}, /* other */
64766e8d
JH
2607 COSTS_N_INSNS (1), /* cost of movsx */
2608 COSTS_N_INSNS (1), /* cost of movzx */
2609 8, /* "large" insn */
2610 17, /* MOVE_RATIO */
25e22b19 2611 6, /* CLEAR_RATIO */
64766e8d
JH
2612 {4, 4, 4}, /* cost of loading integer registers
2613 in QImode, HImode and SImode.
2614 Relative to reg-reg move (2). */
ffa3ce53 2615 {6, 6, 6}, /* cost of storing integer registers */
d321551c
L
2616 {6, 6, 6, 6, 12}, /* cost of loading SSE register
2617 in 32bit, 64bit, 128bit, 256bit and 512bit */
2618 {6, 6, 6, 6, 12}, /* cost of storing SSE register
2619 in 32bit, 64bit, 128bit, 256bit and 512bit */
df41dbaf 2620 {6, 6, 6, 6, 12}, /* cost of unaligned loads. */
df41dbaf 2621 {6, 6, 6, 6, 12}, /* cost of unaligned stores. */
d321551c
L
2622 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */
2623 2, /* cost of moving SSE register to integer. */
a4fe6139
JH
2624 /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
2625 rec. throughput 6.
2626 So 5 uops statically and one uops per load. */
2627 10, 6, /* Gather load static, per_elt. */
2628 10, 6, /* Gather store static, per_elt. */
64766e8d
JH
2629 64, /* size of l1 cache. */
2630 512, /* size of l2 cache. */
2631 64, /* size of prefetch block */
2632 6, /* number of parallel prefetches */
2633 /* FIXME perhaps more appropriate value is 5. */
2634 3, /* Branch cost */
ef9eec0b
JH
2635 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
2636 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
ffa3ce53 2637 /* 10-24 */
ef9eec0b
JH
2638 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
2639 COSTS_N_INSNS (1), /* cost of FABS instruction. */
2640 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
ffa3ce53 2641 COSTS_N_INSNS (23), /* cost of FSQRT instruction. */
6065f444 2642
c53c148c 2643 COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
6065f444
JH
2644 COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
2645 COSTS_N_INSNS (4), /* cost of MULSS instruction. */
2646 COSTS_N_INSNS (5), /* cost of MULSD instruction. */
c53c148c
JH
2647 COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
2648 COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
6065f444
JH
2649 COSTS_N_INSNS (18), /* cost of DIVSS instruction. */
2650 COSTS_N_INSNS (32), /* cost of DIVSD instruction. */
2651 COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */
2652 COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */
64766e8d
JH
2653 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
2654 core_memcpy,
2655 core_memset,
f6fd8f2b
JH
2656 COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
2657 COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
7dc58b50
ML
2658 "16:11:8", /* Loop alignment. */
2659 "16:11:8", /* Jump alignment. */
2660 "0:0:8", /* Label alignment. */
2661 "16", /* Func alignment. */
64766e8d
JH
2662};
2663