]>
Commit | Line | Data |
---|---|---|
df41dbaf | 1 | /* Costs of operations of individual x86 CPUs. |
8d9254fc | 2 | Copyright (C) 1988-2020 Free Software Foundation, Inc. |
64766e8d | 3 | |
df41dbaf JH |
4 | This file is part of GCC. |
5 | ||
6 | GCC is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3, or (at your option) | |
9 | any later version. | |
10 | ||
11 | GCC is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | Under Section 7 of GPL version 3, you are granted additional | |
17 | permissions described in the GCC Runtime Library Exception, version | |
18 | 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | You should have received a copy of the GNU General Public License and | |
21 | a copy of the GCC Runtime Library Exception along with this program; | |
22 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | <http://www.gnu.org/licenses/>. */ | |
64766e8d JH |
24 | /* Processor costs (relative to an add) */ |
25 | /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ | |
26 | #define COSTS_N_BYTES(N) ((N) * 2) | |
27 | ||
28 | #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}} | |
29 | ||
30 | static stringop_algs ix86_size_memcpy[2] = { | |
31 | {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, | |
32 | {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; | |
33 | static stringop_algs ix86_size_memset[2] = { | |
34 | {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, | |
35 | {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}}; | |
36 | ||
37 | const | |
38 | struct processor_costs ix86_size_cost = {/* costs for tuning for size */ | |
72bb85f8 | 39 | { |
d321551c L |
40 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
41 | 2, /* cost for loading QImode using movzbl */ | |
42 | {2, 2, 2}, /* cost of loading integer registers | |
43 | in QImode, HImode and SImode. | |
44 | Relative to reg-reg move (2). */ | |
45 | {2, 2, 2}, /* cost of storing integer registers */ | |
46 | 2, /* cost of reg,reg fld/fst */ | |
47 | {2, 2, 2}, /* cost of loading fp registers | |
48 | in SFmode, DFmode and XFmode */ | |
49 | {2, 2, 2}, /* cost of storing fp registers | |
50 | in SFmode, DFmode and XFmode */ | |
51 | 3, /* cost of moving MMX register */ | |
52 | {3, 3}, /* cost of loading MMX registers | |
53 | in SImode and DImode */ | |
54 | {3, 3}, /* cost of storing MMX registers | |
55 | in SImode and DImode */ | |
56 | 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */ | |
57 | {3, 3, 3, 3, 3}, /* cost of loading SSE registers | |
58 | in 32,64,128,256 and 512-bit */ | |
59 | {3, 3, 3, 3, 3}, /* cost of storing SSE registers | |
60 | in 32,64,128,256 and 512-bit */ | |
61 | 3, 3, /* SSE->integer and integer->SSE moves */ | |
62 | /* End of register allocator costs. */ | |
72bb85f8 | 63 | }, |
d321551c | 64 | |
64766e8d JH |
65 | COSTS_N_BYTES (2), /* cost of an add instruction */ |
66 | COSTS_N_BYTES (3), /* cost of a lea instruction */ | |
67 | COSTS_N_BYTES (2), /* variable shift costs */ | |
68 | COSTS_N_BYTES (3), /* constant shift costs */ | |
69 | {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ | |
70 | COSTS_N_BYTES (3), /* HI */ | |
71 | COSTS_N_BYTES (3), /* SI */ | |
72 | COSTS_N_BYTES (3), /* DI */ | |
73 | COSTS_N_BYTES (5)}, /* other */ | |
74 | 0, /* cost of multiply per each bit set */ | |
75 | {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ | |
76 | COSTS_N_BYTES (3), /* HI */ | |
77 | COSTS_N_BYTES (3), /* SI */ | |
78 | COSTS_N_BYTES (3), /* DI */ | |
79 | COSTS_N_BYTES (5)}, /* other */ | |
80 | COSTS_N_BYTES (3), /* cost of movsx */ | |
81 | COSTS_N_BYTES (3), /* cost of movzx */ | |
82 | 0, /* "large" insn */ | |
83 | 2, /* MOVE_RATIO */ | |
25e22b19 | 84 | 2, /* CLEAR_RATIO */ |
64766e8d JH |
85 | {2, 2, 2}, /* cost of loading integer registers |
86 | in QImode, HImode and SImode. | |
87 | Relative to reg-reg move (2). */ | |
88 | {2, 2, 2}, /* cost of storing integer registers */ | |
d321551c L |
89 | {3, 3, 3, 3, 3}, /* cost of loading SSE register |
90 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
91 | {3, 3, 3, 3, 3}, /* cost of storing SSE register | |
92 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf JH |
93 | {3, 3, 3, 3, 3}, /* cost of unaligned SSE load |
94 | in 128bit, 256bit and 512bit */ | |
d321551c | 95 | {3, 3, 3, 3, 3}, /* cost of unaligned SSE store |
df41dbaf | 96 | in 128bit, 256bit and 512bit */ |
d321551c L |
97 | 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */ |
98 | 3, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
99 | 5, 0, /* Gather load static, per_elt. */ |
100 | 5, 0, /* Gather store static, per_elt. */ | |
64766e8d JH |
101 | 0, /* size of l1 cache */ |
102 | 0, /* size of l2 cache */ | |
103 | 0, /* size of prefetch block */ | |
104 | 0, /* number of parallel prefetches */ | |
105 | 2, /* Branch cost */ | |
106 | COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ | |
107 | COSTS_N_BYTES (2), /* cost of FMUL instruction. */ | |
108 | COSTS_N_BYTES (2), /* cost of FDIV instruction. */ | |
109 | COSTS_N_BYTES (2), /* cost of FABS instruction. */ | |
110 | COSTS_N_BYTES (2), /* cost of FCHS instruction. */ | |
111 | COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ | |
6065f444 | 112 | |
c53c148c | 113 | COSTS_N_BYTES (2), /* cost of cheap SSE instruction. */ |
6065f444 JH |
114 | COSTS_N_BYTES (2), /* cost of ADDSS/SD SUBSS/SD insns. */ |
115 | COSTS_N_BYTES (2), /* cost of MULSS instruction. */ | |
116 | COSTS_N_BYTES (2), /* cost of MULSD instruction. */ | |
c53c148c JH |
117 | COSTS_N_BYTES (2), /* cost of FMA SS instruction. */ |
118 | COSTS_N_BYTES (2), /* cost of FMA SD instruction. */ | |
6065f444 JH |
119 | COSTS_N_BYTES (2), /* cost of DIVSS instruction. */ |
120 | COSTS_N_BYTES (2), /* cost of DIVSD instruction. */ | |
121 | COSTS_N_BYTES (2), /* cost of SQRTSS instruction. */ | |
122 | COSTS_N_BYTES (2), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
123 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
124 | ix86_size_memcpy, | |
125 | ix86_size_memset, | |
f6fd8f2b JH |
126 | COSTS_N_BYTES (1), /* cond_taken_branch_cost. */ |
127 | COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
128 | NULL, /* Loop alignment. */ |
129 | NULL, /* Jump alignment. */ | |
130 | NULL, /* Label alignment. */ | |
131 | NULL, /* Func alignment. */ | |
64766e8d JH |
132 | }; |
133 | ||
134 | /* Processor costs (relative to an add) */ | |
135 | static stringop_algs i386_memcpy[2] = { | |
136 | {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, | |
137 | DUMMY_STRINGOP_ALGS}; | |
138 | static stringop_algs i386_memset[2] = { | |
139 | {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}, | |
140 | DUMMY_STRINGOP_ALGS}; | |
141 | ||
142 | static const | |
143 | struct processor_costs i386_cost = { /* 386 specific costs */ | |
72bb85f8 | 144 | { |
d321551c L |
145 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
146 | 4, /* cost for loading QImode using movzbl */ | |
147 | {2, 4, 2}, /* cost of loading integer registers | |
148 | in QImode, HImode and SImode. | |
149 | Relative to reg-reg move (2). */ | |
150 | {2, 4, 2}, /* cost of storing integer registers */ | |
151 | 2, /* cost of reg,reg fld/fst */ | |
152 | {8, 8, 8}, /* cost of loading fp registers | |
153 | in SFmode, DFmode and XFmode */ | |
154 | {8, 8, 8}, /* cost of storing fp registers | |
155 | in SFmode, DFmode and XFmode */ | |
156 | 2, /* cost of moving MMX register */ | |
157 | {4, 8}, /* cost of loading MMX registers | |
158 | in SImode and DImode */ | |
159 | {4, 8}, /* cost of storing MMX registers | |
160 | in SImode and DImode */ | |
161 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
162 | {4, 8, 16, 32, 64}, /* cost of loading SSE registers | |
163 | in 32,64,128,256 and 512-bit */ | |
164 | {4, 8, 16, 32, 64}, /* cost of storing SSE registers | |
165 | in 32,64,128,256 and 512-bit */ | |
166 | 3, 3, /* SSE->integer and integer->SSE moves */ | |
167 | /* End of register allocator costs. */ | |
72bb85f8 | 168 | }, |
d321551c | 169 | |
64766e8d JH |
170 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
171 | COSTS_N_INSNS (1), /* cost of a lea instruction */ | |
172 | COSTS_N_INSNS (3), /* variable shift costs */ | |
173 | COSTS_N_INSNS (2), /* constant shift costs */ | |
174 | {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ | |
175 | COSTS_N_INSNS (6), /* HI */ | |
176 | COSTS_N_INSNS (6), /* SI */ | |
177 | COSTS_N_INSNS (6), /* DI */ | |
178 | COSTS_N_INSNS (6)}, /* other */ | |
179 | COSTS_N_INSNS (1), /* cost of multiply per each bit set */ | |
180 | {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ | |
181 | COSTS_N_INSNS (23), /* HI */ | |
182 | COSTS_N_INSNS (23), /* SI */ | |
183 | COSTS_N_INSNS (23), /* DI */ | |
184 | COSTS_N_INSNS (23)}, /* other */ | |
185 | COSTS_N_INSNS (3), /* cost of movsx */ | |
186 | COSTS_N_INSNS (2), /* cost of movzx */ | |
187 | 15, /* "large" insn */ | |
188 | 3, /* MOVE_RATIO */ | |
25e22b19 | 189 | 3, /* CLEAR_RATIO */ |
64766e8d JH |
190 | {2, 4, 2}, /* cost of loading integer registers |
191 | in QImode, HImode and SImode. | |
192 | Relative to reg-reg move (2). */ | |
193 | {2, 4, 2}, /* cost of storing integer registers */ | |
d321551c L |
194 | {4, 8, 16, 32, 64}, /* cost of loading SSE register |
195 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
196 | {4, 8, 16, 32, 64}, /* cost of storing SSE register | |
197 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 198 | {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ |
df41dbaf | 199 | {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ |
d321551c L |
200 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
201 | 3, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
202 | 4, 4, /* Gather load static, per_elt. */ |
203 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
204 | 0, /* size of l1 cache */ |
205 | 0, /* size of l2 cache */ | |
206 | 0, /* size of prefetch block */ | |
207 | 0, /* number of parallel prefetches */ | |
208 | 1, /* Branch cost */ | |
209 | COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ | |
210 | COSTS_N_INSNS (27), /* cost of FMUL instruction. */ | |
211 | COSTS_N_INSNS (88), /* cost of FDIV instruction. */ | |
212 | COSTS_N_INSNS (22), /* cost of FABS instruction. */ | |
213 | COSTS_N_INSNS (24), /* cost of FCHS instruction. */ | |
214 | COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ | |
6065f444 | 215 | |
c53c148c | 216 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
217 | COSTS_N_INSNS (23), /* cost of ADDSS/SD SUBSS/SD insns. */ |
218 | COSTS_N_INSNS (27), /* cost of MULSS instruction. */ | |
219 | COSTS_N_INSNS (27), /* cost of MULSD instruction. */ | |
c53c148c JH |
220 | COSTS_N_INSNS (27), /* cost of FMA SS instruction. */ |
221 | COSTS_N_INSNS (27), /* cost of FMA SD instruction. */ | |
6065f444 JH |
222 | COSTS_N_INSNS (88), /* cost of DIVSS instruction. */ |
223 | COSTS_N_INSNS (88), /* cost of DIVSD instruction. */ | |
224 | COSTS_N_INSNS (122), /* cost of SQRTSS instruction. */ | |
225 | COSTS_N_INSNS (122), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
226 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
227 | i386_memcpy, | |
228 | i386_memset, | |
f6fd8f2b JH |
229 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
230 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
231 | "4", /* Loop alignment. */ |
232 | "4", /* Jump alignment. */ | |
233 | NULL, /* Label alignment. */ | |
234 | "4", /* Func alignment. */ | |
64766e8d JH |
235 | }; |
236 | ||
237 | static stringop_algs i486_memcpy[2] = { | |
238 | {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, | |
239 | DUMMY_STRINGOP_ALGS}; | |
240 | static stringop_algs i486_memset[2] = { | |
241 | {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}}, | |
242 | DUMMY_STRINGOP_ALGS}; | |
243 | ||
244 | static const | |
245 | struct processor_costs i486_cost = { /* 486 specific costs */ | |
72bb85f8 | 246 | { |
d321551c L |
247 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
248 | 4, /* cost for loading QImode using movzbl */ | |
249 | {2, 4, 2}, /* cost of loading integer registers | |
250 | in QImode, HImode and SImode. | |
251 | Relative to reg-reg move (2). */ | |
252 | {2, 4, 2}, /* cost of storing integer registers */ | |
253 | 2, /* cost of reg,reg fld/fst */ | |
254 | {8, 8, 8}, /* cost of loading fp registers | |
255 | in SFmode, DFmode and XFmode */ | |
256 | {8, 8, 8}, /* cost of storing fp registers | |
257 | in SFmode, DFmode and XFmode */ | |
258 | 2, /* cost of moving MMX register */ | |
259 | {4, 8}, /* cost of loading MMX registers | |
260 | in SImode and DImode */ | |
261 | {4, 8}, /* cost of storing MMX registers | |
262 | in SImode and DImode */ | |
263 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
264 | {4, 8, 16, 32, 64}, /* cost of loading SSE registers | |
265 | in 32,64,128,256 and 512-bit */ | |
266 | {4, 8, 16, 32, 64}, /* cost of storing SSE registers | |
267 | in 32,64,128,256 and 512-bit */ | |
268 | 3, 3, /* SSE->integer and integer->SSE moves */ | |
269 | /* End of register allocator costs. */ | |
72bb85f8 | 270 | }, |
d321551c | 271 | |
64766e8d JH |
272 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
273 | COSTS_N_INSNS (1), /* cost of a lea instruction */ | |
274 | COSTS_N_INSNS (3), /* variable shift costs */ | |
275 | COSTS_N_INSNS (2), /* constant shift costs */ | |
276 | {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ | |
277 | COSTS_N_INSNS (12), /* HI */ | |
278 | COSTS_N_INSNS (12), /* SI */ | |
279 | COSTS_N_INSNS (12), /* DI */ | |
280 | COSTS_N_INSNS (12)}, /* other */ | |
281 | 1, /* cost of multiply per each bit set */ | |
282 | {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ | |
283 | COSTS_N_INSNS (40), /* HI */ | |
284 | COSTS_N_INSNS (40), /* SI */ | |
285 | COSTS_N_INSNS (40), /* DI */ | |
286 | COSTS_N_INSNS (40)}, /* other */ | |
287 | COSTS_N_INSNS (3), /* cost of movsx */ | |
288 | COSTS_N_INSNS (2), /* cost of movzx */ | |
289 | 15, /* "large" insn */ | |
290 | 3, /* MOVE_RATIO */ | |
25e22b19 | 291 | 3, /* CLEAR_RATIO */ |
64766e8d JH |
292 | {2, 4, 2}, /* cost of loading integer registers |
293 | in QImode, HImode and SImode. | |
294 | Relative to reg-reg move (2). */ | |
295 | {2, 4, 2}, /* cost of storing integer registers */ | |
d321551c L |
296 | {4, 8, 16, 32, 64}, /* cost of loading SSE register |
297 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
298 | {4, 8, 16, 32, 64}, /* cost of storing SSE register | |
299 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 300 | {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ |
df41dbaf | 301 | {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ |
d321551c L |
302 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
303 | 3, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
304 | 4, 4, /* Gather load static, per_elt. */ |
305 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
306 | 4, /* size of l1 cache. 486 has 8kB cache |
307 | shared for code and data, so 4kB is | |
308 | not really precise. */ | |
309 | 4, /* size of l2 cache */ | |
310 | 0, /* size of prefetch block */ | |
311 | 0, /* number of parallel prefetches */ | |
312 | 1, /* Branch cost */ | |
313 | COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ | |
314 | COSTS_N_INSNS (16), /* cost of FMUL instruction. */ | |
315 | COSTS_N_INSNS (73), /* cost of FDIV instruction. */ | |
316 | COSTS_N_INSNS (3), /* cost of FABS instruction. */ | |
317 | COSTS_N_INSNS (3), /* cost of FCHS instruction. */ | |
318 | COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ | |
6065f444 | 319 | |
c53c148c | 320 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
321 | COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ |
322 | COSTS_N_INSNS (16), /* cost of MULSS instruction. */ | |
323 | COSTS_N_INSNS (16), /* cost of MULSD instruction. */ | |
c53c148c JH |
324 | COSTS_N_INSNS (16), /* cost of FMA SS instruction. */ |
325 | COSTS_N_INSNS (16), /* cost of FMA SD instruction. */ | |
6065f444 JH |
326 | COSTS_N_INSNS (73), /* cost of DIVSS instruction. */ |
327 | COSTS_N_INSNS (74), /* cost of DIVSD instruction. */ | |
328 | COSTS_N_INSNS (83), /* cost of SQRTSS instruction. */ | |
329 | COSTS_N_INSNS (83), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
330 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
331 | i486_memcpy, | |
332 | i486_memset, | |
f6fd8f2b JH |
333 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
334 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
335 | "16", /* Loop alignment. */ |
336 | "16", /* Jump alignment. */ | |
337 | "0:0:8", /* Label alignment. */ | |
338 | "16", /* Func alignment. */ | |
64766e8d JH |
339 | }; |
340 | ||
341 | static stringop_algs pentium_memcpy[2] = { | |
342 | {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
343 | DUMMY_STRINGOP_ALGS}; | |
344 | static stringop_algs pentium_memset[2] = { | |
345 | {libcall, {{-1, rep_prefix_4_byte, false}}}, | |
346 | DUMMY_STRINGOP_ALGS}; | |
347 | ||
348 | static const | |
349 | struct processor_costs pentium_cost = { | |
72bb85f8 | 350 | { |
d321551c L |
351 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
352 | 6, /* cost for loading QImode using movzbl */ | |
353 | {2, 4, 2}, /* cost of loading integer registers | |
354 | in QImode, HImode and SImode. | |
355 | Relative to reg-reg move (2). */ | |
356 | {2, 4, 2}, /* cost of storing integer registers */ | |
357 | 2, /* cost of reg,reg fld/fst */ | |
358 | {2, 2, 6}, /* cost of loading fp registers | |
359 | in SFmode, DFmode and XFmode */ | |
360 | {4, 4, 6}, /* cost of storing fp registers | |
361 | in SFmode, DFmode and XFmode */ | |
362 | 8, /* cost of moving MMX register */ | |
363 | {8, 8}, /* cost of loading MMX registers | |
364 | in SImode and DImode */ | |
365 | {8, 8}, /* cost of storing MMX registers | |
366 | in SImode and DImode */ | |
367 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
368 | {4, 8, 16, 32, 64}, /* cost of loading SSE registers | |
369 | in 32,64,128,256 and 512-bit */ | |
370 | {4, 8, 16, 32, 64}, /* cost of storing SSE registers | |
371 | in 32,64,128,256 and 512-bit */ | |
372 | 3, 3, /* SSE->integer and integer->SSE moves */ | |
373 | /* End of register allocator costs. */ | |
72bb85f8 | 374 | }, |
d321551c | 375 | |
64766e8d JH |
376 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
377 | COSTS_N_INSNS (1), /* cost of a lea instruction */ | |
378 | COSTS_N_INSNS (4), /* variable shift costs */ | |
379 | COSTS_N_INSNS (1), /* constant shift costs */ | |
380 | {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ | |
381 | COSTS_N_INSNS (11), /* HI */ | |
382 | COSTS_N_INSNS (11), /* SI */ | |
383 | COSTS_N_INSNS (11), /* DI */ | |
384 | COSTS_N_INSNS (11)}, /* other */ | |
385 | 0, /* cost of multiply per each bit set */ | |
386 | {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ | |
387 | COSTS_N_INSNS (25), /* HI */ | |
388 | COSTS_N_INSNS (25), /* SI */ | |
389 | COSTS_N_INSNS (25), /* DI */ | |
390 | COSTS_N_INSNS (25)}, /* other */ | |
391 | COSTS_N_INSNS (3), /* cost of movsx */ | |
392 | COSTS_N_INSNS (2), /* cost of movzx */ | |
393 | 8, /* "large" insn */ | |
394 | 6, /* MOVE_RATIO */ | |
25e22b19 | 395 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
396 | {2, 4, 2}, /* cost of loading integer registers |
397 | in QImode, HImode and SImode. | |
398 | Relative to reg-reg move (2). */ | |
399 | {2, 4, 2}, /* cost of storing integer registers */ | |
d321551c L |
400 | {4, 8, 16, 32, 64}, /* cost of loading SSE register |
401 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
402 | {4, 8, 16, 32, 64}, /* cost of storing SSE register | |
403 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 404 | {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ |
df41dbaf | 405 | {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ |
d321551c L |
406 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
407 | 3, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
408 | 4, 4, /* Gather load static, per_elt. */ |
409 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
410 | 8, /* size of l1 cache. */ |
411 | 8, /* size of l2 cache */ | |
412 | 0, /* size of prefetch block */ | |
413 | 0, /* number of parallel prefetches */ | |
414 | 2, /* Branch cost */ | |
415 | COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ | |
416 | COSTS_N_INSNS (3), /* cost of FMUL instruction. */ | |
417 | COSTS_N_INSNS (39), /* cost of FDIV instruction. */ | |
418 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ | |
419 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
420 | COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ | |
6065f444 | 421 | |
c53c148c | 422 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
423 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ |
424 | COSTS_N_INSNS (3), /* cost of MULSS instruction. */ | |
425 | COSTS_N_INSNS (3), /* cost of MULSD instruction. */ | |
c53c148c JH |
426 | COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ |
427 | COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ | |
6065f444 JH |
428 | COSTS_N_INSNS (39), /* cost of DIVSS instruction. */ |
429 | COSTS_N_INSNS (39), /* cost of DIVSD instruction. */ | |
430 | COSTS_N_INSNS (70), /* cost of SQRTSS instruction. */ | |
431 | COSTS_N_INSNS (70), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
432 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
433 | pentium_memcpy, | |
434 | pentium_memset, | |
f6fd8f2b JH |
435 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
436 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
437 | "16:8:8", /* Loop alignment. */ |
438 | "16:8:8", /* Jump alignment. */ | |
439 | "0:0:8", /* Label alignment. */ | |
440 | "16", /* Func alignment. */ | |
64766e8d JH |
441 | }; |
442 | ||
443 | static const | |
444 | struct processor_costs lakemont_cost = { | |
72bb85f8 | 445 | { |
d321551c L |
446 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
447 | 6, /* cost for loading QImode using movzbl */ | |
448 | {2, 4, 2}, /* cost of loading integer registers | |
449 | in QImode, HImode and SImode. | |
450 | Relative to reg-reg move (2). */ | |
451 | {2, 4, 2}, /* cost of storing integer registers */ | |
452 | 2, /* cost of reg,reg fld/fst */ | |
453 | {2, 2, 6}, /* cost of loading fp registers | |
454 | in SFmode, DFmode and XFmode */ | |
455 | {4, 4, 6}, /* cost of storing fp registers | |
456 | in SFmode, DFmode and XFmode */ | |
457 | 8, /* cost of moving MMX register */ | |
458 | {8, 8}, /* cost of loading MMX registers | |
459 | in SImode and DImode */ | |
460 | {8, 8}, /* cost of storing MMX registers | |
461 | in SImode and DImode */ | |
462 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
463 | {4, 8, 16, 32, 64}, /* cost of loading SSE registers | |
464 | in 32,64,128,256 and 512-bit */ | |
465 | {4, 8, 16, 32, 64}, /* cost of storing SSE registers | |
466 | in 32,64,128,256 and 512-bit */ | |
467 | 3, 3, /* SSE->integer and integer->SSE moves */ | |
468 | /* End of register allocator costs. */ | |
72bb85f8 | 469 | }, |
d321551c | 470 | |
64766e8d JH |
471 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
472 | COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ | |
473 | COSTS_N_INSNS (1), /* variable shift costs */ | |
474 | COSTS_N_INSNS (1), /* constant shift costs */ | |
475 | {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ | |
476 | COSTS_N_INSNS (11), /* HI */ | |
477 | COSTS_N_INSNS (11), /* SI */ | |
478 | COSTS_N_INSNS (11), /* DI */ | |
479 | COSTS_N_INSNS (11)}, /* other */ | |
480 | 0, /* cost of multiply per each bit set */ | |
481 | {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ | |
482 | COSTS_N_INSNS (25), /* HI */ | |
483 | COSTS_N_INSNS (25), /* SI */ | |
484 | COSTS_N_INSNS (25), /* DI */ | |
485 | COSTS_N_INSNS (25)}, /* other */ | |
486 | COSTS_N_INSNS (3), /* cost of movsx */ | |
487 | COSTS_N_INSNS (2), /* cost of movzx */ | |
488 | 8, /* "large" insn */ | |
489 | 17, /* MOVE_RATIO */ | |
25e22b19 | 490 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
491 | {2, 4, 2}, /* cost of loading integer registers |
492 | in QImode, HImode and SImode. | |
493 | Relative to reg-reg move (2). */ | |
494 | {2, 4, 2}, /* cost of storing integer registers */ | |
d321551c L |
495 | {4, 8, 16, 32, 64}, /* cost of loading SSE register |
496 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
497 | {4, 8, 16, 32, 64}, /* cost of storing SSE register | |
498 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 499 | {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ |
df41dbaf | 500 | {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ |
d321551c L |
501 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
502 | 3, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
503 | 4, 4, /* Gather load static, per_elt. */ |
504 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
505 | 8, /* size of l1 cache. */ |
506 | 8, /* size of l2 cache */ | |
507 | 0, /* size of prefetch block */ | |
508 | 0, /* number of parallel prefetches */ | |
509 | 2, /* Branch cost */ | |
510 | COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ | |
511 | COSTS_N_INSNS (3), /* cost of FMUL instruction. */ | |
512 | COSTS_N_INSNS (39), /* cost of FDIV instruction. */ | |
513 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ | |
514 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
515 | COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ | |
6065f444 | 516 | |
c53c148c | 517 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
518 | COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ |
519 | COSTS_N_INSNS (5), /* cost of MULSS instruction. */ | |
520 | COSTS_N_INSNS (5), /* cost of MULSD instruction. */ | |
c53c148c JH |
521 | COSTS_N_INSNS (10), /* cost of FMA SS instruction. */ |
522 | COSTS_N_INSNS (10), /* cost of FMA SD instruction. */ | |
6065f444 JH |
523 | COSTS_N_INSNS (31), /* cost of DIVSS instruction. */ |
524 | COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ | |
525 | COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ | |
526 | COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
527 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
528 | pentium_memcpy, | |
529 | pentium_memset, | |
f6fd8f2b JH |
530 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
531 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
532 | "16:8:8", /* Loop alignment. */ |
533 | "16:8:8", /* Jump alignment. */ | |
534 | "0:0:8", /* Label alignment. */ | |
535 | "16", /* Func alignment. */ | |
64766e8d JH |
536 | }; |
537 | ||
538 | /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes | |
539 | (we ensure the alignment). For small blocks inline loop is still a | |
540 | noticeable win, for bigger blocks either rep movsl or rep movsb is | |
541 | way to go. Rep movsb has apparently more expensive startup time in CPU, | |
542 | but after 4K the difference is down in the noise. */ | |
543 | static stringop_algs pentiumpro_memcpy[2] = { | |
544 | {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false}, | |
545 | {8192, rep_prefix_4_byte, false}, | |
546 | {-1, rep_prefix_1_byte, false}}}, | |
547 | DUMMY_STRINGOP_ALGS}; | |
548 | static stringop_algs pentiumpro_memset[2] = { | |
549 | {rep_prefix_4_byte, {{1024, unrolled_loop, false}, | |
550 | {8192, rep_prefix_4_byte, false}, | |
551 | {-1, libcall, false}}}, | |
552 | DUMMY_STRINGOP_ALGS}; | |
553 | static const | |
554 | struct processor_costs pentiumpro_cost = { | |
72bb85f8 | 555 | { |
d321551c L |
556 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
557 | 2, /* cost for loading QImode using movzbl */ | |
558 | {4, 4, 4}, /* cost of loading integer registers | |
559 | in QImode, HImode and SImode. | |
560 | Relative to reg-reg move (2). */ | |
561 | {2, 2, 2}, /* cost of storing integer registers */ | |
562 | 2, /* cost of reg,reg fld/fst */ | |
563 | {2, 2, 6}, /* cost of loading fp registers | |
564 | in SFmode, DFmode and XFmode */ | |
565 | {4, 4, 6}, /* cost of storing fp registers | |
566 | in SFmode, DFmode and XFmode */ | |
567 | 2, /* cost of moving MMX register */ | |
568 | {2, 2}, /* cost of loading MMX registers | |
569 | in SImode and DImode */ | |
570 | {2, 2}, /* cost of storing MMX registers | |
571 | in SImode and DImode */ | |
572 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
573 | {4, 8, 16, 32, 64}, /* cost of loading SSE registers | |
574 | in 32,64,128,256 and 512-bit */ | |
575 | {4, 8, 16, 32, 64}, /* cost of storing SSE registers | |
576 | in 32,64,128,256 and 512-bit */ | |
577 | 3, 3, /* SSE->integer and integer->SSE moves */ | |
578 | /* End of register allocator costs. */ | |
72bb85f8 | 579 | }, |
d321551c | 580 | |
64766e8d JH |
581 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
582 | COSTS_N_INSNS (1), /* cost of a lea instruction */ | |
583 | COSTS_N_INSNS (1), /* variable shift costs */ | |
584 | COSTS_N_INSNS (1), /* constant shift costs */ | |
585 | {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ | |
586 | COSTS_N_INSNS (4), /* HI */ | |
587 | COSTS_N_INSNS (4), /* SI */ | |
588 | COSTS_N_INSNS (4), /* DI */ | |
589 | COSTS_N_INSNS (4)}, /* other */ | |
590 | 0, /* cost of multiply per each bit set */ | |
591 | {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ | |
592 | COSTS_N_INSNS (17), /* HI */ | |
593 | COSTS_N_INSNS (17), /* SI */ | |
594 | COSTS_N_INSNS (17), /* DI */ | |
595 | COSTS_N_INSNS (17)}, /* other */ | |
596 | COSTS_N_INSNS (1), /* cost of movsx */ | |
597 | COSTS_N_INSNS (1), /* cost of movzx */ | |
598 | 8, /* "large" insn */ | |
599 | 6, /* MOVE_RATIO */ | |
25e22b19 | 600 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
601 | {4, 4, 4}, /* cost of loading integer registers |
602 | in QImode, HImode and SImode. | |
603 | Relative to reg-reg move (2). */ | |
604 | {2, 2, 2}, /* cost of storing integer registers */ | |
d321551c L |
605 | {4, 8, 16, 32, 64}, /* cost of loading SSE register |
606 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
607 | {4, 8, 16, 32, 64}, /* cost of storing SSE register | |
608 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 609 | {4, 8, 16, 32, 64}, /* cost of unaligned loads. */ |
df41dbaf | 610 | {4, 8, 16, 32, 64}, /* cost of unaligned stores. */ |
d321551c L |
611 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
612 | 3, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
613 | 4, 4, /* Gather load static, per_elt. */ |
614 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
615 | 8, /* size of l1 cache. */ |
616 | 256, /* size of l2 cache */ | |
617 | 32, /* size of prefetch block */ | |
618 | 6, /* number of parallel prefetches */ | |
619 | 2, /* Branch cost */ | |
620 | COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ | |
621 | COSTS_N_INSNS (5), /* cost of FMUL instruction. */ | |
622 | COSTS_N_INSNS (56), /* cost of FDIV instruction. */ | |
623 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
624 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
625 | COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ | |
6065f444 | 626 | |
c53c148c | 627 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
628 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ |
629 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
630 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
c53c148c JH |
631 | COSTS_N_INSNS (7), /* cost of FMA SS instruction. */ |
632 | COSTS_N_INSNS (7), /* cost of FMA SD instruction. */ | |
6065f444 JH |
633 | COSTS_N_INSNS (18), /* cost of DIVSS instruction. */ |
634 | COSTS_N_INSNS (18), /* cost of DIVSD instruction. */ | |
635 | COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ | |
636 | COSTS_N_INSNS (31), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
637 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
638 | pentiumpro_memcpy, | |
639 | pentiumpro_memset, | |
f6fd8f2b JH |
640 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
641 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
642 | "16", /* Loop alignment. */ |
643 | "16:11:8", /* Jump alignment. */ | |
644 | "0:0:8", /* Label alignment. */ | |
645 | "16", /* Func alignment. */ | |
64766e8d JH |
646 | }; |
647 | ||
648 | static stringop_algs geode_memcpy[2] = { | |
649 | {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
650 | DUMMY_STRINGOP_ALGS}; | |
651 | static stringop_algs geode_memset[2] = { | |
652 | {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
653 | DUMMY_STRINGOP_ALGS}; | |
654 | static const | |
655 | struct processor_costs geode_cost = { | |
72bb85f8 | 656 | { |
d321551c L |
657 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
658 | 2, /* cost for loading QImode using movzbl */ | |
659 | {2, 2, 2}, /* cost of loading integer registers | |
660 | in QImode, HImode and SImode. | |
661 | Relative to reg-reg move (2). */ | |
662 | {2, 2, 2}, /* cost of storing integer registers */ | |
663 | 2, /* cost of reg,reg fld/fst */ | |
664 | {2, 2, 2}, /* cost of loading fp registers | |
665 | in SFmode, DFmode and XFmode */ | |
666 | {4, 6, 6}, /* cost of storing fp registers | |
667 | in SFmode, DFmode and XFmode */ | |
668 | 2, /* cost of moving MMX register */ | |
669 | {2, 2}, /* cost of loading MMX registers | |
670 | in SImode and DImode */ | |
671 | {2, 2}, /* cost of storing MMX registers | |
672 | in SImode and DImode */ | |
673 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
674 | {2, 2, 8, 16, 32}, /* cost of loading SSE registers | |
675 | in 32,64,128,256 and 512-bit */ | |
676 | {2, 2, 8, 16, 32}, /* cost of storing SSE registers | |
677 | in 32,64,128,256 and 512-bit */ | |
678 | 6, 6, /* SSE->integer and integer->SSE moves */ | |
679 | /* End of register allocator costs. */ | |
72bb85f8 | 680 | }, |
d321551c | 681 | |
64766e8d JH |
682 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
683 | COSTS_N_INSNS (1), /* cost of a lea instruction */ | |
684 | COSTS_N_INSNS (2), /* variable shift costs */ | |
685 | COSTS_N_INSNS (1), /* constant shift costs */ | |
686 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
687 | COSTS_N_INSNS (4), /* HI */ | |
688 | COSTS_N_INSNS (7), /* SI */ | |
689 | COSTS_N_INSNS (7), /* DI */ | |
690 | COSTS_N_INSNS (7)}, /* other */ | |
691 | 0, /* cost of multiply per each bit set */ | |
692 | {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ | |
693 | COSTS_N_INSNS (23), /* HI */ | |
694 | COSTS_N_INSNS (39), /* SI */ | |
695 | COSTS_N_INSNS (39), /* DI */ | |
696 | COSTS_N_INSNS (39)}, /* other */ | |
697 | COSTS_N_INSNS (1), /* cost of movsx */ | |
698 | COSTS_N_INSNS (1), /* cost of movzx */ | |
699 | 8, /* "large" insn */ | |
700 | 4, /* MOVE_RATIO */ | |
25e22b19 | 701 | 4, /* CLEAR_RATIO */ |
df41dbaf | 702 | {2, 2, 2}, /* cost of loading integer registers |
64766e8d JH |
703 | in QImode, HImode and SImode. |
704 | Relative to reg-reg move (2). */ | |
df41dbaf | 705 | {2, 2, 2}, /* cost of storing integer registers */ |
d321551c L |
706 | {2, 2, 8, 16, 32}, /* cost of loading SSE register |
707 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
708 | {2, 2, 8, 16, 32}, /* cost of storing SSE register | |
709 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 710 | {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ |
df41dbaf | 711 | {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ |
d321551c L |
712 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
713 | 6, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
714 | 2, 2, /* Gather load static, per_elt. */ |
715 | 2, 2, /* Gather store static, per_elt. */ | |
64766e8d JH |
716 | 64, /* size of l1 cache. */ |
717 | 128, /* size of l2 cache. */ | |
718 | 32, /* size of prefetch block */ | |
719 | 1, /* number of parallel prefetches */ | |
720 | 1, /* Branch cost */ | |
721 | COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ | |
722 | COSTS_N_INSNS (11), /* cost of FMUL instruction. */ | |
723 | COSTS_N_INSNS (47), /* cost of FDIV instruction. */ | |
724 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ | |
725 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
726 | COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ | |
6065f444 | 727 | |
c53c148c | 728 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
729 | COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ |
730 | COSTS_N_INSNS (11), /* cost of MULSS instruction. */ | |
731 | COSTS_N_INSNS (11), /* cost of MULSD instruction. */ | |
c53c148c JH |
732 | COSTS_N_INSNS (17), /* cost of FMA SS instruction. */ |
733 | COSTS_N_INSNS (17), /* cost of FMA SD instruction. */ | |
6065f444 JH |
734 | COSTS_N_INSNS (47), /* cost of DIVSS instruction. */ |
735 | COSTS_N_INSNS (47), /* cost of DIVSD instruction. */ | |
736 | COSTS_N_INSNS (54), /* cost of SQRTSS instruction. */ | |
737 | COSTS_N_INSNS (54), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
738 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
739 | geode_memcpy, | |
740 | geode_memset, | |
f6fd8f2b JH |
741 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
742 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
743 | NULL, /* Loop alignment. */ |
744 | NULL, /* Jump alignment. */ | |
745 | NULL, /* Label alignment. */ | |
746 | NULL, /* Func alignment. */ | |
64766e8d JH |
747 | }; |
748 | ||
749 | static stringop_algs k6_memcpy[2] = { | |
750 | {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
751 | DUMMY_STRINGOP_ALGS}; | |
752 | static stringop_algs k6_memset[2] = { | |
753 | {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
754 | DUMMY_STRINGOP_ALGS}; | |
755 | static const | |
756 | struct processor_costs k6_cost = { | |
72bb85f8 | 757 | { |
d321551c L |
758 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
759 | 3, /* cost for loading QImode using movzbl */ | |
760 | {4, 5, 4}, /* cost of loading integer registers | |
761 | in QImode, HImode and SImode. | |
762 | Relative to reg-reg move (2). */ | |
763 | {2, 3, 2}, /* cost of storing integer registers */ | |
764 | 4, /* cost of reg,reg fld/fst */ | |
765 | {6, 6, 6}, /* cost of loading fp registers | |
766 | in SFmode, DFmode and XFmode */ | |
767 | {4, 4, 4}, /* cost of storing fp registers | |
768 | in SFmode, DFmode and XFmode */ | |
769 | 2, /* cost of moving MMX register */ | |
770 | {2, 2}, /* cost of loading MMX registers | |
771 | in SImode and DImode */ | |
772 | {2, 2}, /* cost of storing MMX registers | |
773 | in SImode and DImode */ | |
774 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
775 | {2, 2, 8, 16, 32}, /* cost of loading SSE registers | |
776 | in 32,64,128,256 and 512-bit */ | |
777 | {2, 2, 8, 16, 32}, /* cost of storing SSE registers | |
778 | in 32,64,128,256 and 512-bit */ | |
779 | 6, 6, /* SSE->integer and integer->SSE moves */ | |
780 | /* End of register allocator costs. */ | |
72bb85f8 | 781 | }, |
d321551c | 782 | |
64766e8d JH |
783 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
784 | COSTS_N_INSNS (2), /* cost of a lea instruction */ | |
785 | COSTS_N_INSNS (1), /* variable shift costs */ | |
786 | COSTS_N_INSNS (1), /* constant shift costs */ | |
787 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
788 | COSTS_N_INSNS (3), /* HI */ | |
789 | COSTS_N_INSNS (3), /* SI */ | |
790 | COSTS_N_INSNS (3), /* DI */ | |
791 | COSTS_N_INSNS (3)}, /* other */ | |
792 | 0, /* cost of multiply per each bit set */ | |
793 | {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ | |
794 | COSTS_N_INSNS (18), /* HI */ | |
795 | COSTS_N_INSNS (18), /* SI */ | |
796 | COSTS_N_INSNS (18), /* DI */ | |
797 | COSTS_N_INSNS (18)}, /* other */ | |
798 | COSTS_N_INSNS (2), /* cost of movsx */ | |
799 | COSTS_N_INSNS (2), /* cost of movzx */ | |
800 | 8, /* "large" insn */ | |
801 | 4, /* MOVE_RATIO */ | |
25e22b19 | 802 | 4, /* CLEAR_RATIO */ |
64766e8d JH |
803 | {4, 5, 4}, /* cost of loading integer registers |
804 | in QImode, HImode and SImode. | |
805 | Relative to reg-reg move (2). */ | |
806 | {2, 3, 2}, /* cost of storing integer registers */ | |
d321551c L |
807 | {2, 2, 8, 16, 32}, /* cost of loading SSE register |
808 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
809 | {2, 2, 8, 16, 32}, /* cost of storing SSE register | |
810 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 811 | {2, 2, 8, 16, 32}, /* cost of unaligned loads. */ |
df41dbaf | 812 | {2, 2, 8, 16, 32}, /* cost of unaligned stores. */ |
d321551c L |
813 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
814 | 6, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
815 | 2, 2, /* Gather load static, per_elt. */ |
816 | 2, 2, /* Gather store static, per_elt. */ | |
64766e8d JH |
817 | 32, /* size of l1 cache. */ |
818 | 32, /* size of l2 cache. Some models | |
819 | have integrated l2 cache, but | |
820 | optimizing for k6 is not important | |
821 | enough to worry about that. */ | |
822 | 32, /* size of prefetch block */ | |
823 | 1, /* number of parallel prefetches */ | |
824 | 1, /* Branch cost */ | |
825 | COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ | |
826 | COSTS_N_INSNS (2), /* cost of FMUL instruction. */ | |
827 | COSTS_N_INSNS (56), /* cost of FDIV instruction. */ | |
828 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
829 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
830 | COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ | |
6065f444 | 831 | |
c53c148c | 832 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
833 | COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ |
834 | COSTS_N_INSNS (2), /* cost of MULSS instruction. */ | |
835 | COSTS_N_INSNS (2), /* cost of MULSD instruction. */ | |
c53c148c JH |
836 | COSTS_N_INSNS (4), /* cost of FMA SS instruction. */ |
837 | COSTS_N_INSNS (4), /* cost of FMA SD instruction. */ | |
6065f444 JH |
838 | COSTS_N_INSNS (56), /* cost of DIVSS instruction. */ |
839 | COSTS_N_INSNS (56), /* cost of DIVSD instruction. */ | |
840 | COSTS_N_INSNS (56), /* cost of SQRTSS instruction. */ | |
841 | COSTS_N_INSNS (56), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
842 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
843 | k6_memcpy, | |
844 | k6_memset, | |
f6fd8f2b JH |
845 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
846 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
847 | "32:8:8", /* Loop alignment. */ |
848 | "32:8:8", /* Jump alignment. */ | |
849 | "0:0:8", /* Label alignment. */ | |
850 | "32", /* Func alignment. */ | |
64766e8d JH |
851 | }; |
852 | ||
853 | /* For some reason, Athlon deals better with REP prefix (relative to loops) | |
854 | compared to K8. Alignment becomes important after 8 bytes for memcpy and | |
855 | 128 bytes for memset. */ | |
856 | static stringop_algs athlon_memcpy[2] = { | |
857 | {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
858 | DUMMY_STRINGOP_ALGS}; | |
859 | static stringop_algs athlon_memset[2] = { | |
860 | {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
861 | DUMMY_STRINGOP_ALGS}; | |
862 | static const | |
863 | struct processor_costs athlon_cost = { | |
72bb85f8 | 864 | { |
d321551c L |
865 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
866 | 4, /* cost for loading QImode using movzbl */ | |
867 | {3, 4, 3}, /* cost of loading integer registers | |
868 | in QImode, HImode and SImode. | |
869 | Relative to reg-reg move (2). */ | |
870 | {3, 4, 3}, /* cost of storing integer registers */ | |
871 | 4, /* cost of reg,reg fld/fst */ | |
872 | {4, 4, 12}, /* cost of loading fp registers | |
873 | in SFmode, DFmode and XFmode */ | |
874 | {6, 6, 8}, /* cost of storing fp registers | |
875 | in SFmode, DFmode and XFmode */ | |
876 | 2, /* cost of moving MMX register */ | |
877 | {4, 4}, /* cost of loading MMX registers | |
878 | in SImode and DImode */ | |
879 | {4, 4}, /* cost of storing MMX registers | |
880 | in SImode and DImode */ | |
881 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
882 | {4, 4, 12, 12, 24}, /* cost of loading SSE registers | |
883 | in 32,64,128,256 and 512-bit */ | |
884 | {4, 4, 10, 10, 20}, /* cost of storing SSE registers | |
885 | in 32,64,128,256 and 512-bit */ | |
886 | 5, 5, /* SSE->integer and integer->SSE moves */ | |
887 | /* End of register allocator costs. */ | |
72bb85f8 | 888 | }, |
d321551c | 889 | |
64766e8d JH |
890 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
891 | COSTS_N_INSNS (2), /* cost of a lea instruction */ | |
892 | COSTS_N_INSNS (1), /* variable shift costs */ | |
893 | COSTS_N_INSNS (1), /* constant shift costs */ | |
894 | {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ | |
895 | COSTS_N_INSNS (5), /* HI */ | |
896 | COSTS_N_INSNS (5), /* SI */ | |
897 | COSTS_N_INSNS (5), /* DI */ | |
898 | COSTS_N_INSNS (5)}, /* other */ | |
899 | 0, /* cost of multiply per each bit set */ | |
900 | {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ | |
901 | COSTS_N_INSNS (26), /* HI */ | |
902 | COSTS_N_INSNS (42), /* SI */ | |
903 | COSTS_N_INSNS (74), /* DI */ | |
904 | COSTS_N_INSNS (74)}, /* other */ | |
905 | COSTS_N_INSNS (1), /* cost of movsx */ | |
906 | COSTS_N_INSNS (1), /* cost of movzx */ | |
907 | 8, /* "large" insn */ | |
908 | 9, /* MOVE_RATIO */ | |
25e22b19 | 909 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
910 | {3, 4, 3}, /* cost of loading integer registers |
911 | in QImode, HImode and SImode. | |
912 | Relative to reg-reg move (2). */ | |
913 | {3, 4, 3}, /* cost of storing integer registers */ | |
d321551c L |
914 | {4, 4, 12, 12, 24}, /* cost of loading SSE register |
915 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
916 | {4, 4, 10, 10, 20}, /* cost of storing SSE register | |
917 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
b7167993 | 918 | {4, 4, 12, 12, 24}, /* cost of unaligned loads. */ |
b7167993 | 919 | {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ |
d321551c L |
920 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
921 | 5, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
922 | 4, 4, /* Gather load static, per_elt. */ |
923 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
924 | 64, /* size of l1 cache. */ |
925 | 256, /* size of l2 cache. */ | |
926 | 64, /* size of prefetch block */ | |
927 | 6, /* number of parallel prefetches */ | |
928 | 5, /* Branch cost */ | |
929 | COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ | |
930 | COSTS_N_INSNS (4), /* cost of FMUL instruction. */ | |
931 | COSTS_N_INSNS (24), /* cost of FDIV instruction. */ | |
932 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
933 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
934 | COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ | |
6065f444 | 935 | |
c53c148c | 936 | COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ |
6065f444 JH |
937 | COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ |
938 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
939 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
c53c148c JH |
940 | COSTS_N_INSNS (8), /* cost of FMA SS instruction. */ |
941 | COSTS_N_INSNS (8), /* cost of FMA SD instruction. */ | |
6065f444 JH |
942 | /* 11-16 */ |
943 | COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ | |
944 | COSTS_N_INSNS (24), /* cost of DIVSD instruction. */ | |
945 | COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ | |
946 | COSTS_N_INSNS (19), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
947 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
948 | athlon_memcpy, | |
949 | athlon_memset, | |
f6fd8f2b JH |
950 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
951 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
952 | "16:8:8", /* Loop alignment. */ |
953 | "16:8:8", /* Jump alignment. */ | |
954 | "0:0:8", /* Label alignment. */ | |
955 | "16", /* Func alignment. */ | |
64766e8d JH |
956 | }; |
957 | ||
958 | /* K8 has optimized REP instruction for medium sized blocks, but for very | |
959 | small blocks it is better to use loop. For large blocks, libcall can | |
960 | do nontemporary accesses and beat inline considerably. */ | |
961 | static stringop_algs k8_memcpy[2] = { | |
962 | {libcall, {{6, loop, false}, {14, unrolled_loop, false}, | |
963 | {-1, rep_prefix_4_byte, false}}}, | |
964 | {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, | |
965 | {-1, libcall, false}}}}; | |
966 | static stringop_algs k8_memset[2] = { | |
967 | {libcall, {{8, loop, false}, {24, unrolled_loop, false}, | |
968 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
969 | {libcall, {{48, unrolled_loop, false}, | |
970 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
971 | static const | |
972 | struct processor_costs k8_cost = { | |
72bb85f8 | 973 | { |
d321551c L |
974 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
975 | 4, /* cost for loading QImode using movzbl */ | |
976 | {3, 4, 3}, /* cost of loading integer registers | |
977 | in QImode, HImode and SImode. | |
978 | Relative to reg-reg move (2). */ | |
979 | {3, 4, 3}, /* cost of storing integer registers */ | |
980 | 4, /* cost of reg,reg fld/fst */ | |
981 | {4, 4, 12}, /* cost of loading fp registers | |
982 | in SFmode, DFmode and XFmode */ | |
983 | {6, 6, 8}, /* cost of storing fp registers | |
984 | in SFmode, DFmode and XFmode */ | |
985 | 2, /* cost of moving MMX register */ | |
986 | {3, 3}, /* cost of loading MMX registers | |
987 | in SImode and DImode */ | |
988 | {4, 4}, /* cost of storing MMX registers | |
989 | in SImode and DImode */ | |
990 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
991 | {4, 3, 12, 12, 24}, /* cost of loading SSE registers | |
992 | in 32,64,128,256 and 512-bit */ | |
993 | {4, 4, 10, 10, 20}, /* cost of storing SSE registers | |
994 | in 32,64,128,256 and 512-bit */ | |
995 | 5, 5, /* SSE->integer and integer->SSE moves */ | |
996 | /* End of register allocator costs. */ | |
72bb85f8 | 997 | }, |
d321551c | 998 | |
64766e8d JH |
999 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
1000 | COSTS_N_INSNS (2), /* cost of a lea instruction */ | |
1001 | COSTS_N_INSNS (1), /* variable shift costs */ | |
1002 | COSTS_N_INSNS (1), /* constant shift costs */ | |
1003 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
1004 | COSTS_N_INSNS (4), /* HI */ | |
1005 | COSTS_N_INSNS (3), /* SI */ | |
1006 | COSTS_N_INSNS (4), /* DI */ | |
1007 | COSTS_N_INSNS (5)}, /* other */ | |
1008 | 0, /* cost of multiply per each bit set */ | |
1009 | {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ | |
1010 | COSTS_N_INSNS (26), /* HI */ | |
1011 | COSTS_N_INSNS (42), /* SI */ | |
1012 | COSTS_N_INSNS (74), /* DI */ | |
1013 | COSTS_N_INSNS (74)}, /* other */ | |
1014 | COSTS_N_INSNS (1), /* cost of movsx */ | |
1015 | COSTS_N_INSNS (1), /* cost of movzx */ | |
1016 | 8, /* "large" insn */ | |
1017 | 9, /* MOVE_RATIO */ | |
25e22b19 | 1018 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
1019 | {3, 4, 3}, /* cost of loading integer registers |
1020 | in QImode, HImode and SImode. | |
1021 | Relative to reg-reg move (2). */ | |
1022 | {3, 4, 3}, /* cost of storing integer registers */ | |
d321551c L |
1023 | {4, 3, 12, 12, 24}, /* cost of loading SSE register |
1024 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1025 | {4, 4, 10, 10, 20}, /* cost of storing SSE register | |
1026 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
b7167993 | 1027 | {4, 3, 12, 12, 24}, /* cost of unaligned loads. */ |
b7167993 | 1028 | {4, 4, 10, 10, 20}, /* cost of unaligned stores. */ |
d321551c L |
1029 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
1030 | 5, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
1031 | 4, 4, /* Gather load static, per_elt. */ |
1032 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
1033 | 64, /* size of l1 cache. */ |
1034 | 512, /* size of l2 cache. */ | |
1035 | 64, /* size of prefetch block */ | |
1036 | /* New AMD processors never drop prefetches; if they cannot be performed | |
1037 | immediately, they are queued. We set number of simultaneous prefetches | |
1038 | to a large constant to reflect this (it probably is not a good idea not | |
1039 | to limit number of prefetches at all, as their execution also takes some | |
1040 | time). */ | |
1041 | 100, /* number of parallel prefetches */ | |
1042 | 3, /* Branch cost */ | |
1043 | COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ | |
1044 | COSTS_N_INSNS (4), /* cost of FMUL instruction. */ | |
1045 | COSTS_N_INSNS (19), /* cost of FDIV instruction. */ | |
1046 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
1047 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
1048 | COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ | |
6065f444 | 1049 | |
c53c148c | 1050 | COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ |
6065f444 JH |
1051 | COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ |
1052 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
1053 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
c53c148c JH |
1054 | COSTS_N_INSNS (8), /* cost of FMA SS instruction. */ |
1055 | COSTS_N_INSNS (8), /* cost of FMA SD instruction. */ | |
6065f444 JH |
1056 | /* 11-16 */ |
1057 | COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ | |
1058 | COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ | |
1059 | COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ | |
1060 | COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
1061 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
1062 | k8_memcpy, | |
1063 | k8_memset, | |
f6fd8f2b JH |
1064 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
1065 | COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
1066 | "16:8:8", /* Loop alignment. */ |
1067 | "16:8:8", /* Jump alignment. */ | |
1068 | "0:0:8", /* Label alignment. */ | |
1069 | "16", /* Func alignment. */ | |
64766e8d JH |
1070 | }; |
1071 | ||
1072 | /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for | |
1073 | very small blocks it is better to use loop. For large blocks, libcall can | |
1074 | do nontemporary accesses and beat inline considerably. */ | |
1075 | static stringop_algs amdfam10_memcpy[2] = { | |
1076 | {libcall, {{6, loop, false}, {14, unrolled_loop, false}, | |
1077 | {-1, rep_prefix_4_byte, false}}}, | |
1078 | {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, | |
1079 | {-1, libcall, false}}}}; | |
1080 | static stringop_algs amdfam10_memset[2] = { | |
1081 | {libcall, {{8, loop, false}, {24, unrolled_loop, false}, | |
1082 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
1083 | {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, | |
1084 | {-1, libcall, false}}}}; | |
1085 | struct processor_costs amdfam10_cost = { | |
72bb85f8 | 1086 | { |
d321551c | 1087 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
64766e8d JH |
1088 | 4, /* cost for loading QImode using movzbl */ |
1089 | {3, 4, 3}, /* cost of loading integer registers | |
1090 | in QImode, HImode and SImode. | |
1091 | Relative to reg-reg move (2). */ | |
1092 | {3, 4, 3}, /* cost of storing integer registers */ | |
1093 | 4, /* cost of reg,reg fld/fst */ | |
1094 | {4, 4, 12}, /* cost of loading fp registers | |
1095 | in SFmode, DFmode and XFmode */ | |
1096 | {6, 6, 8}, /* cost of storing fp registers | |
1097 | in SFmode, DFmode and XFmode */ | |
1098 | 2, /* cost of moving MMX register */ | |
1099 | {3, 3}, /* cost of loading MMX registers | |
1100 | in SImode and DImode */ | |
1101 | {4, 4}, /* cost of storing MMX registers | |
1102 | in SImode and DImode */ | |
df41dbaf JH |
1103 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
1104 | {4, 4, 3, 6, 12}, /* cost of loading SSE registers | |
1105 | in 32,64,128,256 and 512-bit */ | |
df41dbaf JH |
1106 | {4, 4, 5, 10, 20}, /* cost of storing SSE registers |
1107 | in 32,64,128,256 and 512-bit */ | |
df41dbaf | 1108 | 3, 3, /* SSE->integer and integer->SSE moves */ |
d321551c | 1109 | |
64766e8d JH |
1110 | /* On K8: |
1111 | MOVD reg64, xmmreg Double FSTORE 4 | |
1112 | MOVD reg32, xmmreg Double FSTORE 4 | |
1113 | On AMDFAM10: | |
1114 | MOVD reg64, xmmreg Double FADD 3 | |
1115 | 1/1 1/1 | |
1116 | MOVD reg32, xmmreg Double FADD 3 | |
1117 | 1/1 1/1 */ | |
d321551c | 1118 | /* End of register allocator costs. */ |
72bb85f8 | 1119 | }, |
d321551c L |
1120 | |
1121 | COSTS_N_INSNS (1), /* cost of an add instruction */ | |
1122 | COSTS_N_INSNS (2), /* cost of a lea instruction */ | |
1123 | COSTS_N_INSNS (1), /* variable shift costs */ | |
1124 | COSTS_N_INSNS (1), /* constant shift costs */ | |
1125 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
1126 | COSTS_N_INSNS (4), /* HI */ | |
1127 | COSTS_N_INSNS (3), /* SI */ | |
1128 | COSTS_N_INSNS (4), /* DI */ | |
1129 | COSTS_N_INSNS (5)}, /* other */ | |
1130 | 0, /* cost of multiply per each bit set */ | |
1131 | {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ | |
1132 | COSTS_N_INSNS (35), /* HI */ | |
1133 | COSTS_N_INSNS (51), /* SI */ | |
1134 | COSTS_N_INSNS (83), /* DI */ | |
1135 | COSTS_N_INSNS (83)}, /* other */ | |
1136 | COSTS_N_INSNS (1), /* cost of movsx */ | |
1137 | COSTS_N_INSNS (1), /* cost of movzx */ | |
1138 | 8, /* "large" insn */ | |
1139 | 9, /* MOVE_RATIO */ | |
25e22b19 | 1140 | 6, /* CLEAR_RATIO */ |
d321551c L |
1141 | {3, 4, 3}, /* cost of loading integer registers |
1142 | in QImode, HImode and SImode. | |
1143 | Relative to reg-reg move (2). */ | |
1144 | {3, 4, 3}, /* cost of storing integer registers */ | |
1145 | {4, 4, 3, 6, 12}, /* cost of loading SSE register | |
1146 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1147 | {4, 4, 5, 10, 20}, /* cost of storing SSE register | |
1148 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1149 | {4, 4, 3, 7, 12}, /* cost of unaligned loads. */ | |
1150 | {4, 4, 5, 10, 20}, /* cost of unaligned stores. */ | |
1151 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
1152 | 3, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
1153 | 4, 4, /* Gather load static, per_elt. */ |
1154 | 4, 4, /* Gather store static, per_elt. */ | |
64766e8d JH |
1155 | 64, /* size of l1 cache. */ |
1156 | 512, /* size of l2 cache. */ | |
1157 | 64, /* size of prefetch block */ | |
1158 | /* New AMD processors never drop prefetches; if they cannot be performed | |
1159 | immediately, they are queued. We set number of simultaneous prefetches | |
1160 | to a large constant to reflect this (it probably is not a good idea not | |
1161 | to limit number of prefetches at all, as their execution also takes some | |
1162 | time). */ | |
1163 | 100, /* number of parallel prefetches */ | |
1164 | 2, /* Branch cost */ | |
1165 | COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ | |
1166 | COSTS_N_INSNS (4), /* cost of FMUL instruction. */ | |
1167 | COSTS_N_INSNS (19), /* cost of FDIV instruction. */ | |
1168 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
1169 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
1170 | COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ | |
6065f444 | 1171 | |
c53c148c | 1172 | COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ |
6065f444 JH |
1173 | COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ |
1174 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
1175 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
c53c148c JH |
1176 | COSTS_N_INSNS (8), /* cost of FMA SS instruction. */ |
1177 | COSTS_N_INSNS (8), /* cost of FMA SD instruction. */ | |
6065f444 JH |
1178 | /* 11-16 */ |
1179 | COSTS_N_INSNS (16), /* cost of DIVSS instruction. */ | |
1180 | COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ | |
1181 | COSTS_N_INSNS (19), /* cost of SQRTSS instruction. */ | |
1182 | COSTS_N_INSNS (27), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
1183 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
1184 | amdfam10_memcpy, | |
1185 | amdfam10_memset, | |
f6fd8f2b JH |
1186 | COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ |
1187 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
1188 | "32:25:8", /* Loop alignment. */ |
1189 | "32:8:8", /* Jump alignment. */ | |
1190 | "0:0:8", /* Label alignment. */ | |
1191 | "32", /* Func alignment. */ | |
64766e8d JH |
1192 | }; |
1193 | ||
c727b835 | 1194 | /* BDVER has optimized REP instruction for medium sized blocks, but for |
64766e8d JH |
1195 | very small blocks it is better to use loop. For large blocks, libcall |
1196 | can do nontemporary accesses and beat inline considerably. */ | |
c727b835 | 1197 | static stringop_algs bdver_memcpy[2] = { |
64766e8d JH |
1198 | {libcall, {{6, loop, false}, {14, unrolled_loop, false}, |
1199 | {-1, rep_prefix_4_byte, false}}}, | |
1200 | {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, | |
1201 | {-1, libcall, false}}}}; | |
c727b835 | 1202 | static stringop_algs bdver_memset[2] = { |
64766e8d JH |
1203 | {libcall, {{8, loop, false}, {24, unrolled_loop, false}, |
1204 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
1205 | {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, | |
1206 | {-1, libcall, false}}}}; | |
1207 | ||
c727b835 | 1208 | const struct processor_costs bdver_cost = { |
72bb85f8 | 1209 | { |
d321551c L |
1210 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
1211 | 8, /* cost for loading QImode using movzbl */ | |
1212 | {8, 8, 8}, /* cost of loading integer registers | |
1213 | in QImode, HImode and SImode. | |
1214 | Relative to reg-reg move (2). */ | |
1215 | {8, 8, 8}, /* cost of storing integer registers */ | |
1216 | 4, /* cost of reg,reg fld/fst */ | |
1217 | {12, 12, 28}, /* cost of loading fp registers | |
1218 | in SFmode, DFmode and XFmode */ | |
1219 | {10, 10, 18}, /* cost of storing fp registers | |
1220 | in SFmode, DFmode and XFmode */ | |
1221 | 4, /* cost of moving MMX register */ | |
1222 | {12, 12}, /* cost of loading MMX registers | |
1223 | in SImode and DImode */ | |
1224 | {10, 10}, /* cost of storing MMX registers | |
1225 | in SImode and DImode */ | |
1226 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
1227 | {12, 12, 10, 40, 60}, /* cost of loading SSE registers | |
1228 | in 32,64,128,256 and 512-bit */ | |
1229 | {10, 10, 10, 40, 60}, /* cost of storing SSE registers | |
1230 | in 32,64,128,256 and 512-bit */ | |
1231 | 16, 20, /* SSE->integer and integer->SSE moves */ | |
1232 | /* End of register allocator costs. */ | |
72bb85f8 | 1233 | }, |
d321551c | 1234 | |
64766e8d JH |
1235 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
1236 | COSTS_N_INSNS (1), /* cost of a lea instruction */ | |
1237 | COSTS_N_INSNS (1), /* variable shift costs */ | |
1238 | COSTS_N_INSNS (1), /* constant shift costs */ | |
1239 | {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ | |
1240 | COSTS_N_INSNS (4), /* HI */ | |
1241 | COSTS_N_INSNS (4), /* SI */ | |
1242 | COSTS_N_INSNS (6), /* DI */ | |
1243 | COSTS_N_INSNS (6)}, /* other */ | |
1244 | 0, /* cost of multiply per each bit set */ | |
1245 | {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ | |
1246 | COSTS_N_INSNS (35), /* HI */ | |
1247 | COSTS_N_INSNS (51), /* SI */ | |
1248 | COSTS_N_INSNS (83), /* DI */ | |
1249 | COSTS_N_INSNS (83)}, /* other */ | |
1250 | COSTS_N_INSNS (1), /* cost of movsx */ | |
1251 | COSTS_N_INSNS (1), /* cost of movzx */ | |
1252 | 8, /* "large" insn */ | |
1253 | 9, /* MOVE_RATIO */ | |
25e22b19 | 1254 | 6, /* CLEAR_RATIO */ |
df41dbaf | 1255 | {8, 8, 8}, /* cost of loading integer registers |
64766e8d JH |
1256 | in QImode, HImode and SImode. |
1257 | Relative to reg-reg move (2). */ | |
df41dbaf | 1258 | {8, 8, 8}, /* cost of storing integer registers */ |
d321551c L |
1259 | {12, 12, 10, 40, 60}, /* cost of loading SSE register |
1260 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1261 | {10, 10, 10, 40, 60}, /* cost of storing SSE register | |
1262 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
b7167993 | 1263 | {12, 12, 10, 40, 60}, /* cost of unaligned loads. */ |
b7167993 | 1264 | {10, 10, 10, 40, 60}, /* cost of unaligned stores. */ |
d321551c L |
1265 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
1266 | 16, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
1267 | 12, 12, /* Gather load static, per_elt. */ |
1268 | 10, 10, /* Gather store static, per_elt. */ | |
64766e8d JH |
1269 | 16, /* size of l1 cache. */ |
1270 | 2048, /* size of l2 cache. */ | |
1271 | 64, /* size of prefetch block */ | |
1272 | /* New AMD processors never drop prefetches; if they cannot be performed | |
1273 | immediately, they are queued. We set number of simultaneous prefetches | |
1274 | to a large constant to reflect this (it probably is not a good idea not | |
1275 | to limit number of prefetches at all, as their execution also takes some | |
1276 | time). */ | |
1277 | 100, /* number of parallel prefetches */ | |
1278 | 2, /* Branch cost */ | |
1279 | COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ | |
1280 | COSTS_N_INSNS (6), /* cost of FMUL instruction. */ | |
1281 | COSTS_N_INSNS (42), /* cost of FDIV instruction. */ | |
1282 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
1283 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
1284 | COSTS_N_INSNS (52), /* cost of FSQRT instruction. */ | |
6065f444 | 1285 | |
c53c148c | 1286 | COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ |
6065f444 JH |
1287 | COSTS_N_INSNS (6), /* cost of ADDSS/SD SUBSS/SD insns. */ |
1288 | COSTS_N_INSNS (6), /* cost of MULSS instruction. */ | |
1289 | COSTS_N_INSNS (6), /* cost of MULSD instruction. */ | |
c53c148c JH |
1290 | COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ |
1291 | COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ | |
6065f444 JH |
1292 | /* 9-24 */ |
1293 | COSTS_N_INSNS (24), /* cost of DIVSS instruction. */ | |
1294 | /* 9-27 */ | |
1295 | COSTS_N_INSNS (27), /* cost of DIVSD instruction. */ | |
1296 | COSTS_N_INSNS (15), /* cost of SQRTSS instruction. */ | |
1297 | COSTS_N_INSNS (26), /* cost of SQRTSD instruction. */ | |
64766e8d | 1298 | 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
c727b835 RB |
1299 | bdver_memcpy, |
1300 | bdver_memset, | |
f6fd8f2b JH |
1301 | COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ |
1302 | COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
1303 | "16:11:8", /* Loop alignment. */ |
1304 | "16:8:8", /* Jump alignment. */ | |
1305 | "0:0:8", /* Label alignment. */ | |
1306 | "11", /* Func alignment. */ | |
64766e8d JH |
1307 | }; |
1308 | ||
1309 | ||
1310 | /* ZNVER1 has optimized REP instruction for medium sized blocks, but for | |
1311 | very small blocks it is better to use loop. For large blocks, libcall | |
1312 | can do nontemporary accesses and beat inline considerably. */ | |
1313 | static stringop_algs znver1_memcpy[2] = { | |
1314 | {libcall, {{6, loop, false}, {14, unrolled_loop, false}, | |
1315 | {-1, rep_prefix_4_byte, false}}}, | |
1316 | {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, | |
1317 | {-1, libcall, false}}}}; | |
1318 | static stringop_algs znver1_memset[2] = { | |
1319 | {libcall, {{8, loop, false}, {24, unrolled_loop, false}, | |
1320 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
1321 | {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, | |
1322 | {-1, libcall, false}}}}; | |
1323 | struct processor_costs znver1_cost = { | |
72bb85f8 | 1324 | { |
d321551c L |
1325 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
1326 | ||
1327 | /* reg-reg moves are done by renaming and thus they are even cheaper than | |
1328 | 1 cycle. Becuase reg-reg move cost is 2 and the following tables correspond | |
1329 | to doubles of latencies, we do not model this correctly. It does not | |
1330 | seem to make practical difference to bump prices up even more. */ | |
1331 | 6, /* cost for loading QImode using | |
1332 | movzbl. */ | |
1333 | {6, 6, 6}, /* cost of loading integer registers | |
1334 | in QImode, HImode and SImode. | |
1335 | Relative to reg-reg move (2). */ | |
1336 | {8, 8, 8}, /* cost of storing integer | |
1337 | registers. */ | |
1338 | 2, /* cost of reg,reg fld/fst. */ | |
1339 | {6, 6, 16}, /* cost of loading fp registers | |
1340 | in SFmode, DFmode and XFmode. */ | |
1341 | {8, 8, 16}, /* cost of storing fp registers | |
1342 | in SFmode, DFmode and XFmode. */ | |
1343 | 2, /* cost of moving MMX register. */ | |
1344 | {6, 6}, /* cost of loading MMX registers | |
1345 | in SImode and DImode. */ | |
1346 | {8, 8}, /* cost of storing MMX registers | |
1347 | in SImode and DImode. */ | |
1348 | 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ | |
1349 | {6, 6, 6, 12, 24}, /* cost of loading SSE registers | |
1350 | in 32,64,128,256 and 512-bit. */ | |
1351 | {8, 8, 8, 16, 32}, /* cost of storing SSE registers | |
1352 | in 32,64,128,256 and 512-bit. */ | |
1353 | 6, 6, /* SSE->integer and integer->SSE moves. */ | |
1354 | /* End of register allocator costs. */ | |
72bb85f8 | 1355 | }, |
d321551c | 1356 | |
64766e8d JH |
1357 | COSTS_N_INSNS (1), /* cost of an add instruction. */ |
1358 | COSTS_N_INSNS (1), /* cost of a lea instruction. */ | |
1359 | COSTS_N_INSNS (1), /* variable shift costs. */ | |
1360 | COSTS_N_INSNS (1), /* constant shift costs. */ | |
1361 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ | |
1362 | COSTS_N_INSNS (3), /* HI. */ | |
1363 | COSTS_N_INSNS (3), /* SI. */ | |
6065f444 JH |
1364 | COSTS_N_INSNS (3), /* DI. */ |
1365 | COSTS_N_INSNS (3)}, /* other. */ | |
64766e8d JH |
1366 | 0, /* cost of multiply per each bit |
1367 | set. */ | |
6065f444 JH |
1368 | /* Depending on parameters, idiv can get faster on ryzen. This is upper |
1369 | bound. */ | |
1370 | {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ | |
1371 | COSTS_N_INSNS (22), /* HI. */ | |
1372 | COSTS_N_INSNS (30), /* SI. */ | |
1373 | COSTS_N_INSNS (45), /* DI. */ | |
1374 | COSTS_N_INSNS (45)}, /* other. */ | |
64766e8d JH |
1375 | COSTS_N_INSNS (1), /* cost of movsx. */ |
1376 | COSTS_N_INSNS (1), /* cost of movzx. */ | |
1377 | 8, /* "large" insn. */ | |
1378 | 9, /* MOVE_RATIO. */ | |
25e22b19 | 1379 | 6, /* CLEAR_RATIO */ |
01118373 | 1380 | {6, 6, 6}, /* cost of loading integer registers |
64766e8d JH |
1381 | in QImode, HImode and SImode. |
1382 | Relative to reg-reg move (2). */ | |
01118373 | 1383 | {8, 8, 8}, /* cost of storing integer |
64766e8d | 1384 | registers. */ |
d321551c L |
1385 | {6, 6, 6, 12, 24}, /* cost of loading SSE register |
1386 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1387 | {8, 8, 8, 16, 32}, /* cost of storing SSE register | |
1388 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
b7167993 | 1389 | {6, 6, 6, 12, 24}, /* cost of unaligned loads. */ |
b7167993 | 1390 | {8, 8, 8, 16, 32}, /* cost of unaligned stores. */ |
d321551c L |
1391 | 2, 3, 6, /* cost of moving XMM,YMM,ZMM register. */ |
1392 | 6, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
1393 | /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, |
1394 | throughput 12. Approx 9 uops do not depend on vector size and every load | |
1395 | is 7 uops. */ | |
1396 | 18, 8, /* Gather load static, per_elt. */ | |
1397 | 18, 10, /* Gather store static, per_elt. */ | |
64766e8d JH |
1398 | 32, /* size of l1 cache. */ |
1399 | 512, /* size of l2 cache. */ | |
1400 | 64, /* size of prefetch block. */ | |
1401 | /* New AMD processors never drop prefetches; if they cannot be performed | |
1402 | immediately, they are queued. We set number of simultaneous prefetches | |
1403 | to a large constant to reflect this (it probably is not a good idea not | |
1404 | to limit number of prefetches at all, as their execution also takes some | |
1405 | time). */ | |
1406 | 100, /* number of parallel prefetches. */ | |
1407 | 3, /* Branch cost. */ | |
6065f444 JH |
1408 | COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ |
1409 | COSTS_N_INSNS (5), /* cost of FMUL instruction. */ | |
1410 | /* Latency of fdiv is 8-15. */ | |
1411 | COSTS_N_INSNS (15), /* cost of FDIV instruction. */ | |
1412 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ | |
1413 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
1414 | /* Latency of fsqrt is 4-10. */ | |
1415 | COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ | |
1416 | ||
c53c148c | 1417 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
1418 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ |
1419 | COSTS_N_INSNS (3), /* cost of MULSS instruction. */ | |
1420 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
c53c148c JH |
1421 | COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ |
1422 | COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ | |
6065f444 JH |
1423 | COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ |
1424 | /* 9-13 */ | |
1425 | COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ | |
1426 | COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ | |
1427 | COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
1428 | /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles |
1429 | and it can execute 2 integer additions and 2 multiplications thus | |
1430 | reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests | |
1431 | that 4 works better than 6 probably due to register pressure. | |
1432 | ||
1433 | Integer vector operations are taken by FP unit and execute 3 vector | |
1434 | plus/minus operations per cycle but only one multiply. This is adjusted | |
1435 | in ix86_reassociation_width. */ | |
1436 | 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ | |
1437 | znver1_memcpy, | |
1438 | znver1_memset, | |
f6fd8f2b JH |
1439 | COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ |
1440 | COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
1441 | "16", /* Loop alignment. */ |
1442 | "16", /* Jump alignment. */ | |
1443 | "0:0:8", /* Label alignment. */ | |
1444 | "16", /* Func alignment. */ | |
64766e8d JH |
1445 | }; |
1446 | ||
2901f42f VK |
1447 | /* ZNVER2 has optimized REP instruction for medium sized blocks, but for |
1448 | very small blocks it is better to use loop. For large blocks, libcall | |
1449 | can do nontemporary accesses and beat inline considerably. */ | |
1450 | static stringop_algs znver2_memcpy[2] = { | |
1451 | {libcall, {{6, loop, false}, {14, unrolled_loop, false}, | |
1452 | {-1, rep_prefix_4_byte, false}}}, | |
187dd65d | 1453 | {libcall, {{16, loop, false}, {64, rep_prefix_4_byte, false}, |
2901f42f VK |
1454 | {-1, libcall, false}}}}; |
1455 | static stringop_algs znver2_memset[2] = { | |
1456 | {libcall, {{8, loop, false}, {24, unrolled_loop, false}, | |
1457 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
187dd65d | 1458 | {libcall, {{24, rep_prefix_4_byte, false}, {128, rep_prefix_8_byte, false}, |
2901f42f VK |
1459 | {-1, libcall, false}}}}; |
1460 | ||
1461 | struct processor_costs znver2_cost = { | |
72bb85f8 | 1462 | { |
d321551c | 1463 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
2901f42f VK |
1464 | |
1465 | /* reg-reg moves are done by renaming and thus they are even cheaper than | |
1466 | 1 cycle. Because reg-reg move cost is 2 and following tables correspond | |
1467 | to doubles of latencies, we do not model this correctly. It does not | |
1468 | seem to make practical difference to bump prices up even more. */ | |
1469 | 6, /* cost for loading QImode using | |
1470 | movzbl. */ | |
1471 | {6, 6, 6}, /* cost of loading integer registers | |
1472 | in QImode, HImode and SImode. | |
1473 | Relative to reg-reg move (2). */ | |
1474 | {8, 8, 8}, /* cost of storing integer | |
1475 | registers. */ | |
1476 | 2, /* cost of reg,reg fld/fst. */ | |
1477 | {6, 6, 16}, /* cost of loading fp registers | |
1478 | in SFmode, DFmode and XFmode. */ | |
1479 | {8, 8, 16}, /* cost of storing fp registers | |
1480 | in SFmode, DFmode and XFmode. */ | |
1481 | 2, /* cost of moving MMX register. */ | |
1482 | {6, 6}, /* cost of loading MMX registers | |
1483 | in SImode and DImode. */ | |
1484 | {8, 8}, /* cost of storing MMX registers | |
1485 | in SImode and DImode. */ | |
187dd65d | 1486 | 2, 2, 3, /* cost of moving XMM,YMM,ZMM |
2901f42f | 1487 | register. */ |
187dd65d | 1488 | {6, 6, 6, 6, 12}, /* cost of loading SSE registers |
2901f42f | 1489 | in 32,64,128,256 and 512-bit. */ |
2901f42f VK |
1490 | {8, 8, 8, 8, 16}, /* cost of storing SSE registers |
1491 | in 32,64,128,256 and 512-bit. */ | |
2901f42f VK |
1492 | 6, 6, /* SSE->integer and integer->SSE |
1493 | moves. */ | |
d321551c | 1494 | /* End of register allocator costs. */ |
72bb85f8 | 1495 | }, |
d321551c L |
1496 | |
1497 | COSTS_N_INSNS (1), /* cost of an add instruction. */ | |
1498 | COSTS_N_INSNS (1), /* cost of a lea instruction. */ | |
1499 | COSTS_N_INSNS (1), /* variable shift costs. */ | |
1500 | COSTS_N_INSNS (1), /* constant shift costs. */ | |
1501 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */ | |
1502 | COSTS_N_INSNS (3), /* HI. */ | |
1503 | COSTS_N_INSNS (3), /* SI. */ | |
1504 | COSTS_N_INSNS (3), /* DI. */ | |
1505 | COSTS_N_INSNS (3)}, /* other. */ | |
1506 | 0, /* cost of multiply per each bit | |
1507 | set. */ | |
1508 | /* Depending on parameters, idiv can get faster on ryzen. This is upper | |
1509 | bound. */ | |
1510 | {COSTS_N_INSNS (16), /* cost of a divide/mod for QI. */ | |
1511 | COSTS_N_INSNS (22), /* HI. */ | |
1512 | COSTS_N_INSNS (30), /* SI. */ | |
1513 | COSTS_N_INSNS (45), /* DI. */ | |
1514 | COSTS_N_INSNS (45)}, /* other. */ | |
1515 | COSTS_N_INSNS (1), /* cost of movsx. */ | |
1516 | COSTS_N_INSNS (1), /* cost of movzx. */ | |
1517 | 8, /* "large" insn. */ | |
1518 | 9, /* MOVE_RATIO. */ | |
25e22b19 | 1519 | 6, /* CLEAR_RATIO */ |
d321551c L |
1520 | {6, 6, 6}, /* cost of loading integer registers |
1521 | in QImode, HImode and SImode. | |
1522 | Relative to reg-reg move (2). */ | |
1523 | {8, 8, 8}, /* cost of storing integer | |
1524 | registers. */ | |
1525 | {6, 6, 6, 6, 12}, /* cost of loading SSE registers | |
1526 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1527 | {8, 8, 8, 8, 16}, /* cost of storing SSE register | |
1528 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1529 | {6, 6, 6, 6, 12}, /* cost of unaligned loads. */ | |
1530 | {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ | |
1531 | 2, 2, 3, /* cost of moving XMM,YMM,ZMM | |
1532 | register. */ | |
1533 | 6, /* cost of moving SSE register to integer. */ | |
2901f42f VK |
1534 | /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops, |
1535 | throughput 12. Approx 9 uops do not depend on vector size and every load | |
1536 | is 7 uops. */ | |
1537 | 18, 8, /* Gather load static, per_elt. */ | |
1538 | 18, 10, /* Gather store static, per_elt. */ | |
1539 | 32, /* size of l1 cache. */ | |
1540 | 512, /* size of l2 cache. */ | |
1541 | 64, /* size of prefetch block. */ | |
1542 | /* New AMD processors never drop prefetches; if they cannot be performed | |
1543 | immediately, they are queued. We set number of simultaneous prefetches | |
1544 | to a large constant to reflect this (it probably is not a good idea not | |
1545 | to limit number of prefetches at all, as their execution also takes some | |
1546 | time). */ | |
1547 | 100, /* number of parallel prefetches. */ | |
1548 | 3, /* Branch cost. */ | |
1549 | COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ | |
1550 | COSTS_N_INSNS (5), /* cost of FMUL instruction. */ | |
1551 | /* Latency of fdiv is 8-15. */ | |
1552 | COSTS_N_INSNS (15), /* cost of FDIV instruction. */ | |
1553 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ | |
1554 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
1555 | /* Latency of fsqrt is 4-10. */ | |
1556 | COSTS_N_INSNS (10), /* cost of FSQRT instruction. */ | |
1557 | ||
1558 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ | |
1559 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ | |
1560 | COSTS_N_INSNS (3), /* cost of MULSS instruction. */ | |
187dd65d | 1561 | COSTS_N_INSNS (3), /* cost of MULSD instruction. */ |
2901f42f VK |
1562 | COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ |
1563 | COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ | |
1564 | COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ | |
1565 | /* 9-13. */ | |
1566 | COSTS_N_INSNS (13), /* cost of DIVSD instruction. */ | |
1567 | COSTS_N_INSNS (10), /* cost of SQRTSS instruction. */ | |
1568 | COSTS_N_INSNS (15), /* cost of SQRTSD instruction. */ | |
1569 | /* Zen can execute 4 integer operations per cycle. FP operations | |
1570 | take 3 cycles and it can execute 2 integer additions and 2 | |
1571 | multiplications thus reassociation may make sense up to with of 6. | |
1572 | SPEC2k6 bencharks suggests | |
1573 | that 4 works better than 6 probably due to register pressure. | |
1574 | ||
1575 | Integer vector operations are taken by FP unit and execute 3 vector | |
1576 | plus/minus operations per cycle but only one multiply. This is adjusted | |
1577 | in ix86_reassociation_width. */ | |
1578 | 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ | |
1579 | znver2_memcpy, | |
1580 | znver2_memset, | |
1581 | COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ | |
1582 | COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ | |
1583 | "16", /* Loop alignment. */ | |
1584 | "16", /* Jump alignment. */ | |
1585 | "0:0:8", /* Label alignment. */ | |
1586 | "16", /* Func alignment. */ | |
1587 | }; | |
1588 | ||
c234d831 UB |
1589 | /* skylake_cost should produce code tuned for Skylake familly of CPUs. */ |
1590 | static stringop_algs skylake_memcpy[2] = { | |
1591 | {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, | |
6e559c70 | 1592 | {libcall, {{16, loop, false}, {512, unrolled_loop, false}, |
c234d831 UB |
1593 | {-1, libcall, false}}}}; |
1594 | ||
1595 | static stringop_algs skylake_memset[2] = { | |
1596 | {libcall, {{6, loop_1_byte, true}, | |
1597 | {24, loop, true}, | |
1598 | {8192, rep_prefix_4_byte, true}, | |
1599 | {-1, libcall, false}}}, | |
6e559c70 | 1600 | {libcall, {{24, loop, true}, {512, unrolled_loop, false}, |
c234d831 UB |
1601 | {-1, libcall, false}}}}; |
1602 | ||
1603 | static const | |
1604 | struct processor_costs skylake_cost = { | |
72bb85f8 | 1605 | { |
d321551c L |
1606 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
1607 | 6, /* cost for loading QImode using movzbl */ | |
1608 | {4, 4, 4}, /* cost of loading integer registers | |
1609 | in QImode, HImode and SImode. | |
1610 | Relative to reg-reg move (2). */ | |
7706f2f3 | 1611 | {6, 6, 6}, /* cost of storing integer registers */ |
d321551c L |
1612 | 2, /* cost of reg,reg fld/fst */ |
1613 | {6, 6, 8}, /* cost of loading fp registers | |
1614 | in SFmode, DFmode and XFmode */ | |
1615 | {6, 6, 10}, /* cost of storing fp registers | |
1616 | in SFmode, DFmode and XFmode */ | |
1617 | 2, /* cost of moving MMX register */ | |
1618 | {6, 6}, /* cost of loading MMX registers | |
1619 | in SImode and DImode */ | |
1620 | {6, 6}, /* cost of storing MMX registers | |
1621 | in SImode and DImode */ | |
1622 | 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ | |
1623 | {6, 6, 6, 10, 20}, /* cost of loading SSE registers | |
1624 | in 32,64,128,256 and 512-bit */ | |
1625 | {8, 8, 8, 12, 24}, /* cost of storing SSE registers | |
1626 | in 32,64,128,256 and 512-bit */ | |
4e9ad7c9 | 1627 | 6, 6, /* SSE->integer and integer->SSE moves */ |
d321551c | 1628 | /* End of register allocator costs. */ |
72bb85f8 | 1629 | }, |
d321551c | 1630 | |
c234d831 UB |
1631 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
1632 | COSTS_N_INSNS (1)+1, /* cost of a lea instruction */ | |
1633 | COSTS_N_INSNS (1), /* variable shift costs */ | |
1634 | COSTS_N_INSNS (1), /* constant shift costs */ | |
1635 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
1636 | COSTS_N_INSNS (4), /* HI */ | |
1637 | COSTS_N_INSNS (3), /* SI */ | |
a2ef9558 MT |
1638 | COSTS_N_INSNS (3), /* DI */ |
1639 | COSTS_N_INSNS (3)}, /* other */ | |
c234d831 | 1640 | 0, /* cost of multiply per each bit set */ |
02308bd3 MT |
1641 | /* Expanding div/mod currently doesn't consider parallelism. So the cost |
1642 | model is not realistic. We compensate by increasing the latencies a bit. */ | |
1643 | {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */ | |
1644 | COSTS_N_INSNS (11), /* HI */ | |
1645 | COSTS_N_INSNS (14), /* SI */ | |
c234d831 UB |
1646 | COSTS_N_INSNS (76), /* DI */ |
1647 | COSTS_N_INSNS (76)}, /* other */ | |
1648 | COSTS_N_INSNS (1), /* cost of movsx */ | |
1649 | COSTS_N_INSNS (0), /* cost of movzx */ | |
1650 | 8, /* "large" insn */ | |
1651 | 17, /* MOVE_RATIO */ | |
25e22b19 | 1652 | 6, /* CLEAR_RATIO */ |
c234d831 UB |
1653 | {4, 4, 4}, /* cost of loading integer registers |
1654 | in QImode, HImode and SImode. | |
1655 | Relative to reg-reg move (2). */ | |
101a0841 | 1656 | {6, 6, 6}, /* cost of storing integer registers */ |
d321551c L |
1657 | {6, 6, 6, 10, 20}, /* cost of loading SSE register |
1658 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1659 | {8, 8, 8, 12, 24}, /* cost of storing SSE register | |
1660 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
c234d831 | 1661 | {6, 6, 6, 10, 20}, /* cost of unaligned loads. */ |
c234d831 | 1662 | {8, 8, 8, 8, 16}, /* cost of unaligned stores. */ |
d321551c L |
1663 | 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ |
1664 | 2, /* cost of moving SSE register to integer. */ | |
c234d831 UB |
1665 | 20, 8, /* Gather load static, per_elt. */ |
1666 | 22, 10, /* Gather store static, per_elt. */ | |
1667 | 64, /* size of l1 cache. */ | |
1668 | 512, /* size of l2 cache. */ | |
1669 | 64, /* size of prefetch block */ | |
1670 | 6, /* number of parallel prefetches */ | |
1671 | 3, /* Branch cost */ | |
1672 | COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ | |
1673 | COSTS_N_INSNS (4), /* cost of FMUL instruction. */ | |
1674 | COSTS_N_INSNS (20), /* cost of FDIV instruction. */ | |
1675 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ | |
1676 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
1677 | COSTS_N_INSNS (20), /* cost of FSQRT instruction. */ | |
1678 | ||
1679 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ | |
1680 | COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ | |
1681 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
1682 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
1683 | COSTS_N_INSNS (4), /* cost of FMA SS instruction. */ | |
1684 | COSTS_N_INSNS (4), /* cost of FMA SD instruction. */ | |
1685 | COSTS_N_INSNS (11), /* cost of DIVSS instruction. */ | |
1686 | COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ | |
1687 | COSTS_N_INSNS (12), /* cost of SQRTSS instruction. */ | |
1688 | COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ | |
1689 | 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ | |
1690 | skylake_memcpy, | |
1691 | skylake_memset, | |
1692 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ | |
1693 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
1694 | "16:11:8", /* Loop alignment. */ |
1695 | "16:11:8", /* Jump alignment. */ | |
1696 | "0:0:8", /* Label alignment. */ | |
1697 | "16", /* Func alignment. */ | |
c234d831 | 1698 | }; |
64766e8d JH |
1699 | /* BTVER1 has optimized REP instruction for medium sized blocks, but for |
1700 | very small blocks it is better to use loop. For large blocks, libcall can | |
1701 | do nontemporary accesses and beat inline considerably. */ | |
1702 | static stringop_algs btver1_memcpy[2] = { | |
1703 | {libcall, {{6, loop, false}, {14, unrolled_loop, false}, | |
1704 | {-1, rep_prefix_4_byte, false}}}, | |
1705 | {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, | |
1706 | {-1, libcall, false}}}}; | |
1707 | static stringop_algs btver1_memset[2] = { | |
1708 | {libcall, {{8, loop, false}, {24, unrolled_loop, false}, | |
1709 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
1710 | {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, | |
1711 | {-1, libcall, false}}}}; | |
1712 | const struct processor_costs btver1_cost = { | |
72bb85f8 | 1713 | { |
d321551c L |
1714 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
1715 | 8, /* cost for loading QImode using movzbl */ | |
1716 | {6, 8, 6}, /* cost of loading integer registers | |
1717 | in QImode, HImode and SImode. | |
1718 | Relative to reg-reg move (2). */ | |
1719 | {6, 8, 6}, /* cost of storing integer registers */ | |
1720 | 4, /* cost of reg,reg fld/fst */ | |
1721 | {12, 12, 28}, /* cost of loading fp registers | |
1722 | in SFmode, DFmode and XFmode */ | |
1723 | {12, 12, 38}, /* cost of storing fp registers | |
1724 | in SFmode, DFmode and XFmode */ | |
1725 | 4, /* cost of moving MMX register */ | |
1726 | {10, 10}, /* cost of loading MMX registers | |
1727 | in SImode and DImode */ | |
1728 | {12, 12}, /* cost of storing MMX registers | |
1729 | in SImode and DImode */ | |
1730 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
1731 | {10, 10, 12, 48, 96}, /* cost of loading SSE registers | |
1732 | in 32,64,128,256 and 512-bit */ | |
1733 | {10, 10, 12, 48, 96}, /* cost of storing SSE registers | |
1734 | in 32,64,128,256 and 512-bit */ | |
1735 | 14, 14, /* SSE->integer and integer->SSE moves */ | |
1736 | /* End of register allocator costs. */ | |
72bb85f8 | 1737 | }, |
d321551c | 1738 | |
64766e8d JH |
1739 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
1740 | COSTS_N_INSNS (2), /* cost of a lea instruction */ | |
1741 | COSTS_N_INSNS (1), /* variable shift costs */ | |
1742 | COSTS_N_INSNS (1), /* constant shift costs */ | |
1743 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
1744 | COSTS_N_INSNS (4), /* HI */ | |
1745 | COSTS_N_INSNS (3), /* SI */ | |
1746 | COSTS_N_INSNS (4), /* DI */ | |
1747 | COSTS_N_INSNS (5)}, /* other */ | |
1748 | 0, /* cost of multiply per each bit set */ | |
1749 | {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ | |
1750 | COSTS_N_INSNS (35), /* HI */ | |
1751 | COSTS_N_INSNS (51), /* SI */ | |
1752 | COSTS_N_INSNS (83), /* DI */ | |
1753 | COSTS_N_INSNS (83)}, /* other */ | |
1754 | COSTS_N_INSNS (1), /* cost of movsx */ | |
1755 | COSTS_N_INSNS (1), /* cost of movzx */ | |
1756 | 8, /* "large" insn */ | |
1757 | 9, /* MOVE_RATIO */ | |
25e22b19 | 1758 | 6, /* CLEAR_RATIO */ |
df41dbaf | 1759 | {6, 8, 6}, /* cost of loading integer registers |
64766e8d JH |
1760 | in QImode, HImode and SImode. |
1761 | Relative to reg-reg move (2). */ | |
df41dbaf | 1762 | {6, 8, 6}, /* cost of storing integer registers */ |
d321551c L |
1763 | {10, 10, 12, 48, 96}, /* cost of loading SSE register |
1764 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1765 | {10, 10, 12, 48, 96}, /* cost of storing SSE register | |
1766 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
b7167993 | 1767 | {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ |
b7167993 | 1768 | {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ |
d321551c L |
1769 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
1770 | 14, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
1771 | 10, 10, /* Gather load static, per_elt. */ |
1772 | 10, 10, /* Gather store static, per_elt. */ | |
64766e8d JH |
1773 | 32, /* size of l1 cache. */ |
1774 | 512, /* size of l2 cache. */ | |
1775 | 64, /* size of prefetch block */ | |
1776 | 100, /* number of parallel prefetches */ | |
1777 | 2, /* Branch cost */ | |
1778 | COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ | |
1779 | COSTS_N_INSNS (4), /* cost of FMUL instruction. */ | |
1780 | COSTS_N_INSNS (19), /* cost of FDIV instruction. */ | |
1781 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
1782 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
1783 | COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ | |
6065f444 | 1784 | |
c53c148c | 1785 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
1786 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ |
1787 | COSTS_N_INSNS (2), /* cost of MULSS instruction. */ | |
1788 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
c53c148c JH |
1789 | COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ |
1790 | COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ | |
6065f444 JH |
1791 | COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ |
1792 | COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ | |
1793 | COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ | |
1794 | COSTS_N_INSNS (48), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
1795 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
1796 | btver1_memcpy, | |
1797 | btver1_memset, | |
f6fd8f2b JH |
1798 | COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ |
1799 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
1800 | "16:11:8", /* Loop alignment. */ |
1801 | "16:8:8", /* Jump alignment. */ | |
1802 | "0:0:8", /* Label alignment. */ | |
1803 | "11", /* Func alignment. */ | |
64766e8d JH |
1804 | }; |
1805 | ||
1806 | static stringop_algs btver2_memcpy[2] = { | |
1807 | {libcall, {{6, loop, false}, {14, unrolled_loop, false}, | |
1808 | {-1, rep_prefix_4_byte, false}}}, | |
1809 | {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false}, | |
1810 | {-1, libcall, false}}}}; | |
1811 | static stringop_algs btver2_memset[2] = { | |
1812 | {libcall, {{8, loop, false}, {24, unrolled_loop, false}, | |
1813 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
1814 | {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false}, | |
1815 | {-1, libcall, false}}}}; | |
1816 | const struct processor_costs btver2_cost = { | |
72bb85f8 | 1817 | { |
d321551c L |
1818 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
1819 | 8, /* cost for loading QImode using movzbl */ | |
1820 | {8, 8, 6}, /* cost of loading integer registers | |
1821 | in QImode, HImode and SImode. | |
1822 | Relative to reg-reg move (2). */ | |
1823 | {8, 8, 6}, /* cost of storing integer registers */ | |
1824 | 4, /* cost of reg,reg fld/fst */ | |
1825 | {12, 12, 28}, /* cost of loading fp registers | |
1826 | in SFmode, DFmode and XFmode */ | |
1827 | {12, 12, 38}, /* cost of storing fp registers | |
1828 | in SFmode, DFmode and XFmode */ | |
1829 | 4, /* cost of moving MMX register */ | |
1830 | {10, 10}, /* cost of loading MMX registers | |
1831 | in SImode and DImode */ | |
1832 | {12, 12}, /* cost of storing MMX registers | |
1833 | in SImode and DImode */ | |
1834 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
1835 | {10, 10, 12, 48, 96}, /* cost of loading SSE registers | |
1836 | in 32,64,128,256 and 512-bit */ | |
1837 | {10, 10, 12, 48, 96}, /* cost of storing SSE registers | |
1838 | in 32,64,128,256 and 512-bit */ | |
1839 | 14, 14, /* SSE->integer and integer->SSE moves */ | |
1840 | /* End of register allocator costs. */ | |
72bb85f8 | 1841 | }, |
d321551c | 1842 | |
64766e8d JH |
1843 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
1844 | COSTS_N_INSNS (2), /* cost of a lea instruction */ | |
1845 | COSTS_N_INSNS (1), /* variable shift costs */ | |
1846 | COSTS_N_INSNS (1), /* constant shift costs */ | |
1847 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
1848 | COSTS_N_INSNS (4), /* HI */ | |
1849 | COSTS_N_INSNS (3), /* SI */ | |
1850 | COSTS_N_INSNS (4), /* DI */ | |
1851 | COSTS_N_INSNS (5)}, /* other */ | |
1852 | 0, /* cost of multiply per each bit set */ | |
1853 | {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ | |
1854 | COSTS_N_INSNS (35), /* HI */ | |
1855 | COSTS_N_INSNS (51), /* SI */ | |
1856 | COSTS_N_INSNS (83), /* DI */ | |
1857 | COSTS_N_INSNS (83)}, /* other */ | |
1858 | COSTS_N_INSNS (1), /* cost of movsx */ | |
1859 | COSTS_N_INSNS (1), /* cost of movzx */ | |
1860 | 8, /* "large" insn */ | |
1861 | 9, /* MOVE_RATIO */ | |
25e22b19 | 1862 | 6, /* CLEAR_RATIO */ |
df41dbaf | 1863 | {8, 8, 6}, /* cost of loading integer registers |
64766e8d JH |
1864 | in QImode, HImode and SImode. |
1865 | Relative to reg-reg move (2). */ | |
df41dbaf | 1866 | {8, 8, 6}, /* cost of storing integer registers */ |
d321551c L |
1867 | {10, 10, 12, 48, 96}, /* cost of loading SSE register |
1868 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1869 | {10, 10, 12, 48, 96}, /* cost of storing SSE register | |
1870 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
b7167993 | 1871 | {10, 10, 12, 48, 96}, /* cost of unaligned loads. */ |
b7167993 | 1872 | {10, 10, 12, 48, 96}, /* cost of unaligned stores. */ |
d321551c L |
1873 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
1874 | 14, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
1875 | 10, 10, /* Gather load static, per_elt. */ |
1876 | 10, 10, /* Gather store static, per_elt. */ | |
64766e8d JH |
1877 | 32, /* size of l1 cache. */ |
1878 | 2048, /* size of l2 cache. */ | |
1879 | 64, /* size of prefetch block */ | |
1880 | 100, /* number of parallel prefetches */ | |
1881 | 2, /* Branch cost */ | |
1882 | COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ | |
1883 | COSTS_N_INSNS (4), /* cost of FMUL instruction. */ | |
1884 | COSTS_N_INSNS (19), /* cost of FDIV instruction. */ | |
1885 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
1886 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
1887 | COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ | |
6065f444 | 1888 | |
c53c148c | 1889 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
1890 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ |
1891 | COSTS_N_INSNS (2), /* cost of MULSS instruction. */ | |
1892 | COSTS_N_INSNS (4), /* cost of MULSD instruction. */ | |
c53c148c JH |
1893 | COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ |
1894 | COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ | |
6065f444 JH |
1895 | COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ |
1896 | COSTS_N_INSNS (19), /* cost of DIVSD instruction. */ | |
1897 | COSTS_N_INSNS (16), /* cost of SQRTSS instruction. */ | |
1898 | COSTS_N_INSNS (21), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
1899 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
1900 | btver2_memcpy, | |
1901 | btver2_memset, | |
f6fd8f2b JH |
1902 | COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ |
1903 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
1904 | "16:11:8", /* Loop alignment. */ |
1905 | "16:8:8", /* Jump alignment. */ | |
1906 | "0:0:8", /* Label alignment. */ | |
1907 | "11", /* Func alignment. */ | |
64766e8d JH |
1908 | }; |
1909 | ||
1910 | static stringop_algs pentium4_memcpy[2] = { | |
1911 | {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, | |
1912 | DUMMY_STRINGOP_ALGS}; | |
1913 | static stringop_algs pentium4_memset[2] = { | |
1914 | {libcall, {{6, loop_1_byte, false}, {48, loop, false}, | |
1915 | {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
1916 | DUMMY_STRINGOP_ALGS}; | |
1917 | ||
1918 | static const | |
1919 | struct processor_costs pentium4_cost = { | |
72bb85f8 | 1920 | { |
d321551c | 1921 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
df41dbaf | 1922 | 5, /* cost for loading QImode using movzbl */ |
64766e8d JH |
1923 | {4, 5, 4}, /* cost of loading integer registers |
1924 | in QImode, HImode and SImode. | |
1925 | Relative to reg-reg move (2). */ | |
1926 | {2, 3, 2}, /* cost of storing integer registers */ | |
df41dbaf JH |
1927 | 12, /* cost of reg,reg fld/fst */ |
1928 | {14, 14, 14}, /* cost of loading fp registers | |
64766e8d | 1929 | in SFmode, DFmode and XFmode */ |
df41dbaf | 1930 | {14, 14, 14}, /* cost of storing fp registers |
64766e8d | 1931 | in SFmode, DFmode and XFmode */ |
df41dbaf JH |
1932 | 12, /* cost of moving MMX register */ |
1933 | {16, 16}, /* cost of loading MMX registers | |
64766e8d | 1934 | in SImode and DImode */ |
df41dbaf | 1935 | {16, 16}, /* cost of storing MMX registers |
64766e8d | 1936 | in SImode and DImode */ |
df41dbaf JH |
1937 | 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */ |
1938 | {16, 16, 16, 32, 64}, /* cost of loading SSE registers | |
1939 | in 32,64,128,256 and 512-bit */ | |
d321551c L |
1940 | {16, 16, 16, 32, 64}, /* cost of storing SSE registers |
1941 | in 32,64,128,256 and 512-bit */ | |
1942 | 20, 12, /* SSE->integer and integer->SSE moves */ | |
1943 | /* End of register allocator costs. */ | |
72bb85f8 | 1944 | }, |
d321551c L |
1945 | |
1946 | COSTS_N_INSNS (1), /* cost of an add instruction */ | |
1947 | COSTS_N_INSNS (3), /* cost of a lea instruction */ | |
1948 | COSTS_N_INSNS (4), /* variable shift costs */ | |
1949 | COSTS_N_INSNS (4), /* constant shift costs */ | |
1950 | {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ | |
1951 | COSTS_N_INSNS (15), /* HI */ | |
1952 | COSTS_N_INSNS (15), /* SI */ | |
1953 | COSTS_N_INSNS (15), /* DI */ | |
1954 | COSTS_N_INSNS (15)}, /* other */ | |
1955 | 0, /* cost of multiply per each bit set */ | |
1956 | {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ | |
1957 | COSTS_N_INSNS (56), /* HI */ | |
1958 | COSTS_N_INSNS (56), /* SI */ | |
1959 | COSTS_N_INSNS (56), /* DI */ | |
1960 | COSTS_N_INSNS (56)}, /* other */ | |
1961 | COSTS_N_INSNS (1), /* cost of movsx */ | |
1962 | COSTS_N_INSNS (1), /* cost of movzx */ | |
1963 | 16, /* "large" insn */ | |
1964 | 6, /* MOVE_RATIO */ | |
25e22b19 | 1965 | 6, /* CLEAR_RATIO */ |
d321551c L |
1966 | {4, 5, 4}, /* cost of loading integer registers |
1967 | in QImode, HImode and SImode. | |
1968 | Relative to reg-reg move (2). */ | |
1969 | {2, 3, 2}, /* cost of storing integer registers */ | |
1970 | {16, 16, 16, 32, 64}, /* cost of loading SSE register | |
1971 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
1972 | {16, 16, 16, 32, 64}, /* cost of storing SSE register | |
1973 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 1974 | {32, 32, 32, 64, 128}, /* cost of unaligned loads. */ |
df41dbaf | 1975 | {32, 32, 32, 64, 128}, /* cost of unaligned stores. */ |
d321551c L |
1976 | 12, 24, 48, /* cost of moving XMM,YMM,ZMM register */ |
1977 | 20, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
1978 | 16, 16, /* Gather load static, per_elt. */ |
1979 | 16, 16, /* Gather store static, per_elt. */ | |
64766e8d JH |
1980 | 8, /* size of l1 cache. */ |
1981 | 256, /* size of l2 cache. */ | |
1982 | 64, /* size of prefetch block */ | |
1983 | 6, /* number of parallel prefetches */ | |
1984 | 2, /* Branch cost */ | |
1985 | COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ | |
1986 | COSTS_N_INSNS (7), /* cost of FMUL instruction. */ | |
1987 | COSTS_N_INSNS (43), /* cost of FDIV instruction. */ | |
1988 | COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
1989 | COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
1990 | COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ | |
6065f444 | 1991 | |
c53c148c | 1992 | COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ |
6065f444 JH |
1993 | COSTS_N_INSNS (4), /* cost of ADDSS/SD SUBSS/SD insns. */ |
1994 | COSTS_N_INSNS (6), /* cost of MULSS instruction. */ | |
1995 | COSTS_N_INSNS (6), /* cost of MULSD instruction. */ | |
c53c148c JH |
1996 | COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ |
1997 | COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ | |
6065f444 JH |
1998 | COSTS_N_INSNS (23), /* cost of DIVSS instruction. */ |
1999 | COSTS_N_INSNS (38), /* cost of DIVSD instruction. */ | |
2000 | COSTS_N_INSNS (23), /* cost of SQRTSS instruction. */ | |
2001 | COSTS_N_INSNS (38), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
2002 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
2003 | pentium4_memcpy, | |
2004 | pentium4_memset, | |
f6fd8f2b JH |
2005 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
2006 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
2007 | NULL, /* Loop alignment. */ |
2008 | NULL, /* Jump alignment. */ | |
2009 | NULL, /* Label alignment. */ | |
2010 | NULL, /* Func alignment. */ | |
64766e8d JH |
2011 | }; |
2012 | ||
2013 | static stringop_algs nocona_memcpy[2] = { | |
2014 | {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}}, | |
2015 | {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false}, | |
2016 | {100000, unrolled_loop, false}, {-1, libcall, false}}}}; | |
2017 | ||
2018 | static stringop_algs nocona_memset[2] = { | |
2019 | {libcall, {{6, loop_1_byte, false}, {48, loop, false}, | |
2020 | {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
2021 | {libcall, {{24, loop, false}, {64, unrolled_loop, false}, | |
2022 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
2023 | ||
2024 | static const | |
2025 | struct processor_costs nocona_cost = { | |
72bb85f8 | 2026 | { |
d321551c L |
2027 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
2028 | 4, /* cost for loading QImode using movzbl */ | |
2029 | {4, 4, 4}, /* cost of loading integer registers | |
2030 | in QImode, HImode and SImode. | |
2031 | Relative to reg-reg move (2). */ | |
2032 | {4, 4, 4}, /* cost of storing integer registers */ | |
2033 | 12, /* cost of reg,reg fld/fst */ | |
2034 | {14, 14, 14}, /* cost of loading fp registers | |
2035 | in SFmode, DFmode and XFmode */ | |
2036 | {14, 14, 14}, /* cost of storing fp registers | |
2037 | in SFmode, DFmode and XFmode */ | |
2038 | 14, /* cost of moving MMX register */ | |
2039 | {12, 12}, /* cost of loading MMX registers | |
2040 | in SImode and DImode */ | |
2041 | {12, 12}, /* cost of storing MMX registers | |
2042 | in SImode and DImode */ | |
2043 | 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */ | |
2044 | {12, 12, 12, 24, 48}, /* cost of loading SSE registers | |
2045 | in 32,64,128,256 and 512-bit */ | |
2046 | {12, 12, 12, 24, 48}, /* cost of storing SSE registers | |
2047 | in 32,64,128,256 and 512-bit */ | |
2048 | 20, 12, /* SSE->integer and integer->SSE moves */ | |
2049 | /* End of register allocator costs. */ | |
72bb85f8 | 2050 | }, |
d321551c | 2051 | |
64766e8d JH |
2052 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
2053 | COSTS_N_INSNS (1), /* cost of a lea instruction */ | |
2054 | COSTS_N_INSNS (1), /* variable shift costs */ | |
2055 | COSTS_N_INSNS (1), /* constant shift costs */ | |
2056 | {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ | |
2057 | COSTS_N_INSNS (10), /* HI */ | |
2058 | COSTS_N_INSNS (10), /* SI */ | |
2059 | COSTS_N_INSNS (10), /* DI */ | |
2060 | COSTS_N_INSNS (10)}, /* other */ | |
2061 | 0, /* cost of multiply per each bit set */ | |
2062 | {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ | |
2063 | COSTS_N_INSNS (66), /* HI */ | |
2064 | COSTS_N_INSNS (66), /* SI */ | |
2065 | COSTS_N_INSNS (66), /* DI */ | |
2066 | COSTS_N_INSNS (66)}, /* other */ | |
2067 | COSTS_N_INSNS (1), /* cost of movsx */ | |
2068 | COSTS_N_INSNS (1), /* cost of movzx */ | |
2069 | 16, /* "large" insn */ | |
2070 | 17, /* MOVE_RATIO */ | |
25e22b19 | 2071 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
2072 | {4, 4, 4}, /* cost of loading integer registers |
2073 | in QImode, HImode and SImode. | |
2074 | Relative to reg-reg move (2). */ | |
2075 | {4, 4, 4}, /* cost of storing integer registers */ | |
d321551c L |
2076 | {12, 12, 12, 24, 48}, /* cost of loading SSE register |
2077 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
2078 | {12, 12, 12, 24, 48}, /* cost of storing SSE register | |
2079 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 2080 | {24, 24, 24, 48, 96}, /* cost of unaligned loads. */ |
df41dbaf | 2081 | {24, 24, 24, 48, 96}, /* cost of unaligned stores. */ |
d321551c L |
2082 | 6, 12, 24, /* cost of moving XMM,YMM,ZMM register */ |
2083 | 20, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
2084 | 12, 12, /* Gather load static, per_elt. */ |
2085 | 12, 12, /* Gather store static, per_elt. */ | |
64766e8d JH |
2086 | 8, /* size of l1 cache. */ |
2087 | 1024, /* size of l2 cache. */ | |
2088 | 64, /* size of prefetch block */ | |
2089 | 8, /* number of parallel prefetches */ | |
2090 | 1, /* Branch cost */ | |
2091 | COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ | |
2092 | COSTS_N_INSNS (8), /* cost of FMUL instruction. */ | |
2093 | COSTS_N_INSNS (40), /* cost of FDIV instruction. */ | |
2094 | COSTS_N_INSNS (3), /* cost of FABS instruction. */ | |
2095 | COSTS_N_INSNS (3), /* cost of FCHS instruction. */ | |
2096 | COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ | |
6065f444 | 2097 | |
c53c148c | 2098 | COSTS_N_INSNS (2), /* cost of cheap SSE instruction. */ |
6065f444 JH |
2099 | COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ |
2100 | COSTS_N_INSNS (7), /* cost of MULSS instruction. */ | |
2101 | COSTS_N_INSNS (7), /* cost of MULSD instruction. */ | |
c53c148c JH |
2102 | COSTS_N_INSNS (7), /* cost of FMA SS instruction. */ |
2103 | COSTS_N_INSNS (7), /* cost of FMA SD instruction. */ | |
6065f444 JH |
2104 | COSTS_N_INSNS (32), /* cost of DIVSS instruction. */ |
2105 | COSTS_N_INSNS (40), /* cost of DIVSD instruction. */ | |
2106 | COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ | |
2107 | COSTS_N_INSNS (41), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
2108 | 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
2109 | nocona_memcpy, | |
2110 | nocona_memset, | |
f6fd8f2b JH |
2111 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
2112 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
2113 | NULL, /* Loop alignment. */ |
2114 | NULL, /* Jump alignment. */ | |
2115 | NULL, /* Label alignment. */ | |
2116 | NULL, /* Func alignment. */ | |
64766e8d JH |
2117 | }; |
2118 | ||
2119 | static stringop_algs atom_memcpy[2] = { | |
2120 | {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, | |
2121 | {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, | |
2122 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
2123 | static stringop_algs atom_memset[2] = { | |
2124 | {libcall, {{8, loop, false}, {15, unrolled_loop, false}, | |
2125 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
2126 | {libcall, {{24, loop, false}, {32, unrolled_loop, false}, | |
2127 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
2128 | static const | |
2129 | struct processor_costs atom_cost = { | |
72bb85f8 | 2130 | { |
d321551c L |
2131 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
2132 | 6, /* cost for loading QImode using movzbl */ | |
2133 | {6, 6, 6}, /* cost of loading integer registers | |
2134 | in QImode, HImode and SImode. | |
2135 | Relative to reg-reg move (2). */ | |
2136 | {6, 6, 6}, /* cost of storing integer registers */ | |
2137 | 4, /* cost of reg,reg fld/fst */ | |
2138 | {6, 6, 18}, /* cost of loading fp registers | |
2139 | in SFmode, DFmode and XFmode */ | |
2140 | {14, 14, 24}, /* cost of storing fp registers | |
2141 | in SFmode, DFmode and XFmode */ | |
2142 | 2, /* cost of moving MMX register */ | |
2143 | {8, 8}, /* cost of loading MMX registers | |
2144 | in SImode and DImode */ | |
2145 | {10, 10}, /* cost of storing MMX registers | |
2146 | in SImode and DImode */ | |
2147 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
2148 | {8, 8, 8, 16, 32}, /* cost of loading SSE registers | |
2149 | in 32,64,128,256 and 512-bit */ | |
2150 | {8, 8, 8, 16, 32}, /* cost of storing SSE registers | |
2151 | in 32,64,128,256 and 512-bit */ | |
2152 | 8, 6, /* SSE->integer and integer->SSE moves */ | |
2153 | /* End of register allocator costs. */ | |
72bb85f8 | 2154 | }, |
d321551c | 2155 | |
64766e8d JH |
2156 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
2157 | COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ | |
2158 | COSTS_N_INSNS (1), /* variable shift costs */ | |
2159 | COSTS_N_INSNS (1), /* constant shift costs */ | |
2160 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
2161 | COSTS_N_INSNS (4), /* HI */ | |
2162 | COSTS_N_INSNS (3), /* SI */ | |
2163 | COSTS_N_INSNS (4), /* DI */ | |
2164 | COSTS_N_INSNS (2)}, /* other */ | |
2165 | 0, /* cost of multiply per each bit set */ | |
2166 | {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ | |
2167 | COSTS_N_INSNS (26), /* HI */ | |
2168 | COSTS_N_INSNS (42), /* SI */ | |
2169 | COSTS_N_INSNS (74), /* DI */ | |
2170 | COSTS_N_INSNS (74)}, /* other */ | |
2171 | COSTS_N_INSNS (1), /* cost of movsx */ | |
2172 | COSTS_N_INSNS (1), /* cost of movzx */ | |
2173 | 8, /* "large" insn */ | |
2174 | 17, /* MOVE_RATIO */ | |
25e22b19 | 2175 | 6, /* CLEAR_RATIO */ |
df41dbaf | 2176 | {6, 6, 6}, /* cost of loading integer registers |
64766e8d JH |
2177 | in QImode, HImode and SImode. |
2178 | Relative to reg-reg move (2). */ | |
df41dbaf | 2179 | {6, 6, 6}, /* cost of storing integer registers */ |
d321551c L |
2180 | {8, 8, 8, 16, 32}, /* cost of loading SSE register |
2181 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
2182 | {8, 8, 8, 16, 32}, /* cost of storing SSE register | |
2183 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 2184 | {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ |
df41dbaf | 2185 | {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ |
d321551c L |
2186 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
2187 | 8, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
2188 | 8, 8, /* Gather load static, per_elt. */ |
2189 | 8, 8, /* Gather store static, per_elt. */ | |
64766e8d JH |
2190 | 32, /* size of l1 cache. */ |
2191 | 256, /* size of l2 cache. */ | |
2192 | 64, /* size of prefetch block */ | |
2193 | 6, /* number of parallel prefetches */ | |
2194 | 3, /* Branch cost */ | |
2195 | COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ | |
2196 | COSTS_N_INSNS (8), /* cost of FMUL instruction. */ | |
2197 | COSTS_N_INSNS (20), /* cost of FDIV instruction. */ | |
2198 | COSTS_N_INSNS (8), /* cost of FABS instruction. */ | |
2199 | COSTS_N_INSNS (8), /* cost of FCHS instruction. */ | |
2200 | COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ | |
6065f444 | 2201 | |
c53c148c | 2202 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
2203 | COSTS_N_INSNS (5), /* cost of ADDSS/SD SUBSS/SD insns. */ |
2204 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
2205 | COSTS_N_INSNS (5), /* cost of MULSD instruction. */ | |
c53c148c JH |
2206 | COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ |
2207 | COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ | |
6065f444 JH |
2208 | COSTS_N_INSNS (31), /* cost of DIVSS instruction. */ |
2209 | COSTS_N_INSNS (60), /* cost of DIVSD instruction. */ | |
2210 | COSTS_N_INSNS (31), /* cost of SQRTSS instruction. */ | |
2211 | COSTS_N_INSNS (63), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
2212 | 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ |
2213 | atom_memcpy, | |
2214 | atom_memset, | |
f6fd8f2b JH |
2215 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
2216 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
2217 | "16", /* Loop alignment. */ |
2218 | "16:8:8", /* Jump alignment. */ | |
2219 | "0:0:8", /* Label alignment. */ | |
2220 | "16", /* Func alignment. */ | |
64766e8d JH |
2221 | }; |
2222 | ||
2223 | static stringop_algs slm_memcpy[2] = { | |
2224 | {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, | |
2225 | {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, | |
2226 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
2227 | static stringop_algs slm_memset[2] = { | |
2228 | {libcall, {{8, loop, false}, {15, unrolled_loop, false}, | |
2229 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
2230 | {libcall, {{24, loop, false}, {32, unrolled_loop, false}, | |
2231 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
2232 | static const | |
2233 | struct processor_costs slm_cost = { | |
72bb85f8 | 2234 | { |
d321551c L |
2235 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
2236 | 8, /* cost for loading QImode using movzbl */ | |
2237 | {8, 8, 8}, /* cost of loading integer registers | |
2238 | in QImode, HImode and SImode. | |
2239 | Relative to reg-reg move (2). */ | |
2240 | {6, 6, 6}, /* cost of storing integer registers */ | |
2241 | 2, /* cost of reg,reg fld/fst */ | |
2242 | {8, 8, 18}, /* cost of loading fp registers | |
2243 | in SFmode, DFmode and XFmode */ | |
2244 | {6, 6, 18}, /* cost of storing fp registers | |
2245 | in SFmode, DFmode and XFmode */ | |
2246 | 2, /* cost of moving MMX register */ | |
2247 | {8, 8}, /* cost of loading MMX registers | |
2248 | in SImode and DImode */ | |
2249 | {6, 6}, /* cost of storing MMX registers | |
2250 | in SImode and DImode */ | |
2251 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ | |
2252 | {8, 8, 8, 16, 32}, /* cost of loading SSE registers | |
2253 | in 32,64,128,256 and 512-bit */ | |
2254 | {8, 8, 8, 16, 32}, /* cost of storing SSE registers | |
2255 | in 32,64,128,256 and 512-bit */ | |
2256 | 8, 6, /* SSE->integer and integer->SSE moves */ | |
2257 | /* End of register allocator costs. */ | |
72bb85f8 | 2258 | }, |
d321551c | 2259 | |
64766e8d JH |
2260 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
2261 | COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ | |
2262 | COSTS_N_INSNS (1), /* variable shift costs */ | |
2263 | COSTS_N_INSNS (1), /* constant shift costs */ | |
2264 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
2265 | COSTS_N_INSNS (3), /* HI */ | |
2266 | COSTS_N_INSNS (3), /* SI */ | |
2267 | COSTS_N_INSNS (4), /* DI */ | |
2268 | COSTS_N_INSNS (2)}, /* other */ | |
2269 | 0, /* cost of multiply per each bit set */ | |
2270 | {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ | |
2271 | COSTS_N_INSNS (26), /* HI */ | |
2272 | COSTS_N_INSNS (42), /* SI */ | |
2273 | COSTS_N_INSNS (74), /* DI */ | |
2274 | COSTS_N_INSNS (74)}, /* other */ | |
2275 | COSTS_N_INSNS (1), /* cost of movsx */ | |
2276 | COSTS_N_INSNS (1), /* cost of movzx */ | |
2277 | 8, /* "large" insn */ | |
2278 | 17, /* MOVE_RATIO */ | |
25e22b19 | 2279 | 6, /* CLEAR_RATIO */ |
df41dbaf | 2280 | {8, 8, 8}, /* cost of loading integer registers |
64766e8d JH |
2281 | in QImode, HImode and SImode. |
2282 | Relative to reg-reg move (2). */ | |
df41dbaf | 2283 | {6, 6, 6}, /* cost of storing integer registers */ |
d321551c L |
2284 | {8, 8, 8, 16, 32}, /* cost of loading SSE register |
2285 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
2286 | {8, 8, 8, 16, 32}, /* cost of storing SSE register | |
2287 | in SImode, DImode and TImode. */ | |
df41dbaf | 2288 | {16, 16, 16, 32, 64}, /* cost of unaligned loads. */ |
df41dbaf | 2289 | {16, 16, 16, 32, 64}, /* cost of unaligned stores. */ |
d321551c L |
2290 | 2, 4, 8, /* cost of moving XMM,YMM,ZMM register */ |
2291 | 8, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
2292 | 8, 8, /* Gather load static, per_elt. */ |
2293 | 8, 8, /* Gather store static, per_elt. */ | |
64766e8d JH |
2294 | 32, /* size of l1 cache. */ |
2295 | 256, /* size of l2 cache. */ | |
2296 | 64, /* size of prefetch block */ | |
2297 | 6, /* number of parallel prefetches */ | |
2298 | 3, /* Branch cost */ | |
2299 | COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ | |
2300 | COSTS_N_INSNS (8), /* cost of FMUL instruction. */ | |
2301 | COSTS_N_INSNS (20), /* cost of FDIV instruction. */ | |
2302 | COSTS_N_INSNS (8), /* cost of FABS instruction. */ | |
2303 | COSTS_N_INSNS (8), /* cost of FCHS instruction. */ | |
2304 | COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ | |
6065f444 | 2305 | |
c53c148c | 2306 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
2307 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ |
2308 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
2309 | COSTS_N_INSNS (5), /* cost of MULSD instruction. */ | |
c53c148c JH |
2310 | COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ |
2311 | COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ | |
6065f444 JH |
2312 | COSTS_N_INSNS (39), /* cost of DIVSS instruction. */ |
2313 | COSTS_N_INSNS (69), /* cost of DIVSD instruction. */ | |
2314 | COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ | |
2315 | COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
2316 | 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
2317 | slm_memcpy, | |
2318 | slm_memset, | |
f6fd8f2b JH |
2319 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
2320 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
2321 | "16", /* Loop alignment. */ |
2322 | "16:8:8", /* Jump alignment. */ | |
2323 | "0:0:8", /* Label alignment. */ | |
2324 | "16", /* Func alignment. */ | |
64766e8d JH |
2325 | }; |
2326 | ||
2327 | static stringop_algs intel_memcpy[2] = { | |
2328 | {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}}, | |
2329 | {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false}, | |
2330 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
2331 | static stringop_algs intel_memset[2] = { | |
2332 | {libcall, {{8, loop, false}, {15, unrolled_loop, false}, | |
2333 | {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}}, | |
2334 | {libcall, {{24, loop, false}, {32, unrolled_loop, false}, | |
2335 | {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}}; | |
2336 | static const | |
2337 | struct processor_costs intel_cost = { | |
72bb85f8 | 2338 | { |
d321551c L |
2339 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
2340 | 6, /* cost for loading QImode using movzbl */ | |
2341 | {4, 4, 4}, /* cost of loading integer registers | |
2342 | in QImode, HImode and SImode. | |
2343 | Relative to reg-reg move (2). */ | |
2344 | {6, 6, 6}, /* cost of storing integer registers */ | |
2345 | 2, /* cost of reg,reg fld/fst */ | |
2346 | {6, 6, 8}, /* cost of loading fp registers | |
2347 | in SFmode, DFmode and XFmode */ | |
2348 | {6, 6, 10}, /* cost of storing fp registers | |
2349 | in SFmode, DFmode and XFmode */ | |
2350 | 2, /* cost of moving MMX register */ | |
2351 | {6, 6}, /* cost of loading MMX registers | |
2352 | in SImode and DImode */ | |
2353 | {6, 6}, /* cost of storing MMX registers | |
2354 | in SImode and DImode */ | |
2355 | 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ | |
2356 | {6, 6, 6, 6, 6}, /* cost of loading SSE registers | |
2357 | in 32,64,128,256 and 512-bit */ | |
2358 | {6, 6, 6, 6, 6}, /* cost of storing SSE registers | |
2359 | in 32,64,128,256 and 512-bit */ | |
2360 | 4, 4, /* SSE->integer and integer->SSE moves */ | |
2361 | /* End of register allocator costs. */ | |
72bb85f8 | 2362 | }, |
d321551c | 2363 | |
64766e8d JH |
2364 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
2365 | COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ | |
2366 | COSTS_N_INSNS (1), /* variable shift costs */ | |
2367 | COSTS_N_INSNS (1), /* constant shift costs */ | |
2368 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
2369 | COSTS_N_INSNS (3), /* HI */ | |
2370 | COSTS_N_INSNS (3), /* SI */ | |
2371 | COSTS_N_INSNS (4), /* DI */ | |
2372 | COSTS_N_INSNS (2)}, /* other */ | |
2373 | 0, /* cost of multiply per each bit set */ | |
2374 | {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ | |
2375 | COSTS_N_INSNS (26), /* HI */ | |
2376 | COSTS_N_INSNS (42), /* SI */ | |
2377 | COSTS_N_INSNS (74), /* DI */ | |
2378 | COSTS_N_INSNS (74)}, /* other */ | |
2379 | COSTS_N_INSNS (1), /* cost of movsx */ | |
2380 | COSTS_N_INSNS (1), /* cost of movzx */ | |
2381 | 8, /* "large" insn */ | |
2382 | 17, /* MOVE_RATIO */ | |
25e22b19 | 2383 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
2384 | {4, 4, 4}, /* cost of loading integer registers |
2385 | in QImode, HImode and SImode. | |
2386 | Relative to reg-reg move (2). */ | |
af863030 | 2387 | {6, 6, 6}, /* cost of storing integer registers */ |
d321551c L |
2388 | {6, 6, 6, 6, 6}, /* cost of loading SSE register |
2389 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
2390 | {6, 6, 6, 6, 6}, /* cost of storing SSE register | |
2391 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 2392 | {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ |
df41dbaf | 2393 | {10, 10, 10, 10, 10}, /* cost of unaligned loads. */ |
d321551c L |
2394 | 2, 2, 2, /* cost of moving XMM,YMM,ZMM register */ |
2395 | 4, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
2396 | 6, 6, /* Gather load static, per_elt. */ |
2397 | 6, 6, /* Gather store static, per_elt. */ | |
64766e8d JH |
2398 | 32, /* size of l1 cache. */ |
2399 | 256, /* size of l2 cache. */ | |
2400 | 64, /* size of prefetch block */ | |
2401 | 6, /* number of parallel prefetches */ | |
2402 | 3, /* Branch cost */ | |
2403 | COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ | |
2404 | COSTS_N_INSNS (8), /* cost of FMUL instruction. */ | |
2405 | COSTS_N_INSNS (20), /* cost of FDIV instruction. */ | |
2406 | COSTS_N_INSNS (8), /* cost of FABS instruction. */ | |
2407 | COSTS_N_INSNS (8), /* cost of FCHS instruction. */ | |
2408 | COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ | |
6065f444 | 2409 | |
3ff59baa | 2410 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
2411 | COSTS_N_INSNS (8), /* cost of ADDSS/SD SUBSS/SD insns. */ |
2412 | COSTS_N_INSNS (8), /* cost of MULSS instruction. */ | |
2413 | COSTS_N_INSNS (8), /* cost of MULSD instruction. */ | |
c53c148c JH |
2414 | COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ |
2415 | COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ | |
6065f444 JH |
2416 | COSTS_N_INSNS (20), /* cost of DIVSS instruction. */ |
2417 | COSTS_N_INSNS (20), /* cost of DIVSD instruction. */ | |
2418 | COSTS_N_INSNS (40), /* cost of SQRTSS instruction. */ | |
2419 | COSTS_N_INSNS (40), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
2420 | 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ |
2421 | intel_memcpy, | |
2422 | intel_memset, | |
f6fd8f2b JH |
2423 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
2424 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
2425 | "16", /* Loop alignment. */ |
2426 | "16:8:8", /* Jump alignment. */ | |
2427 | "0:0:8", /* Label alignment. */ | |
2428 | "16", /* Func alignment. */ | |
64766e8d JH |
2429 | }; |
2430 | ||
2431 | /* Generic should produce code tuned for Core-i7 (and newer chips) | |
2432 | and btver1 (and newer chips). */ | |
2433 | ||
2434 | static stringop_algs generic_memcpy[2] = { | |
2435 | {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, | |
2436 | {-1, libcall, false}}}, | |
2437 | {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, | |
2438 | {-1, libcall, false}}}}; | |
2439 | static stringop_algs generic_memset[2] = { | |
2440 | {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, | |
2441 | {-1, libcall, false}}}, | |
2442 | {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false}, | |
2443 | {-1, libcall, false}}}}; | |
2444 | static const | |
2445 | struct processor_costs generic_cost = { | |
72bb85f8 | 2446 | { |
d321551c L |
2447 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
2448 | 6, /* cost for loading QImode using movzbl */ | |
2449 | {6, 6, 6}, /* cost of loading integer registers | |
2450 | in QImode, HImode and SImode. | |
2451 | Relative to reg-reg move (2). */ | |
2452 | {6, 6, 6}, /* cost of storing integer registers */ | |
2453 | 4, /* cost of reg,reg fld/fst */ | |
2454 | {6, 6, 12}, /* cost of loading fp registers | |
2455 | in SFmode, DFmode and XFmode */ | |
2456 | {6, 6, 12}, /* cost of storing fp registers | |
2457 | in SFmode, DFmode and XFmode */ | |
2458 | 2, /* cost of moving MMX register */ | |
2459 | {6, 6}, /* cost of loading MMX registers | |
2460 | in SImode and DImode */ | |
2461 | {6, 6}, /* cost of storing MMX registers | |
2462 | in SImode and DImode */ | |
2463 | 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ | |
2464 | {6, 6, 6, 10, 15}, /* cost of loading SSE registers | |
2465 | in 32,64,128,256 and 512-bit */ | |
2466 | {6, 6, 6, 10, 15}, /* cost of storing SSE registers | |
2467 | in 32,64,128,256 and 512-bit */ | |
2468 | 6, 6, /* SSE->integer and integer->SSE moves */ | |
2469 | /* End of register allocator costs. */ | |
72bb85f8 | 2470 | }, |
d321551c | 2471 | |
64766e8d | 2472 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
ef9eec0b | 2473 | /* Setting cost to 2 makes our current implementation of synth_mult result in |
64766e8d JH |
2474 | use of unnecessary temporary registers causing regression on several |
2475 | SPECfp benchmarks. */ | |
2476 | COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ | |
2477 | COSTS_N_INSNS (1), /* variable shift costs */ | |
2478 | COSTS_N_INSNS (1), /* constant shift costs */ | |
2479 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
2480 | COSTS_N_INSNS (4), /* HI */ | |
2481 | COSTS_N_INSNS (3), /* SI */ | |
2482 | COSTS_N_INSNS (4), /* DI */ | |
7c080ade | 2483 | COSTS_N_INSNS (4)}, /* other */ |
64766e8d | 2484 | 0, /* cost of multiply per each bit set */ |
7c080ade JH |
2485 | {COSTS_N_INSNS (16), /* cost of a divide/mod for QI */ |
2486 | COSTS_N_INSNS (22), /* HI */ | |
2487 | COSTS_N_INSNS (30), /* SI */ | |
64766e8d JH |
2488 | COSTS_N_INSNS (74), /* DI */ |
2489 | COSTS_N_INSNS (74)}, /* other */ | |
2490 | COSTS_N_INSNS (1), /* cost of movsx */ | |
2491 | COSTS_N_INSNS (1), /* cost of movzx */ | |
2492 | 8, /* "large" insn */ | |
2493 | 17, /* MOVE_RATIO */ | |
25e22b19 | 2494 | 6, /* CLEAR_RATIO */ |
d555138e | 2495 | {6, 6, 6}, /* cost of loading integer registers |
64766e8d JH |
2496 | in QImode, HImode and SImode. |
2497 | Relative to reg-reg move (2). */ | |
af863030 | 2498 | {6, 6, 6}, /* cost of storing integer registers */ |
d321551c L |
2499 | {6, 6, 6, 10, 15}, /* cost of loading SSE register |
2500 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
2501 | {6, 6, 6, 10, 15}, /* cost of storing SSE register | |
2502 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
7c080ade | 2503 | {6, 6, 6, 10, 15}, /* cost of unaligned loads. */ |
7c080ade | 2504 | {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ |
d321551c L |
2505 | 2, 3, 4, /* cost of moving XMM,YMM,ZMM register */ |
2506 | 6, /* cost of moving SSE register to integer. */ | |
7c080ade JH |
2507 | 18, 6, /* Gather load static, per_elt. */ |
2508 | 18, 6, /* Gather store static, per_elt. */ | |
64766e8d JH |
2509 | 32, /* size of l1 cache. */ |
2510 | 512, /* size of l2 cache. */ | |
2511 | 64, /* size of prefetch block */ | |
2512 | 6, /* number of parallel prefetches */ | |
2513 | /* Benchmarks shows large regressions on K8 sixtrack benchmark when this | |
2514 | value is increased to perhaps more appropriate value of 5. */ | |
2515 | 3, /* Branch cost */ | |
ef9eec0b | 2516 | COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ |
7c080ade | 2517 | COSTS_N_INSNS (5), /* cost of FMUL instruction. */ |
e8e3054e | 2518 | COSTS_N_INSNS (17), /* cost of FDIV instruction. */ |
ef9eec0b JH |
2519 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ |
2520 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
e8e3054e | 2521 | COSTS_N_INSNS (14), /* cost of FSQRT instruction. */ |
6065f444 | 2522 | |
ef9eec0b JH |
2523 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
2524 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ | |
2525 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
2526 | COSTS_N_INSNS (5), /* cost of MULSD instruction. */ | |
2527 | COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ | |
2528 | COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ | |
e8e3054e JH |
2529 | COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ |
2530 | COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ | |
2531 | COSTS_N_INSNS (14), /* cost of SQRTSS instruction. */ | |
2532 | COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ | |
7c080ade | 2533 | 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ |
64766e8d JH |
2534 | generic_memcpy, |
2535 | generic_memset, | |
e8e3054e JH |
2536 | COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ |
2537 | COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
2538 | "16:11:8", /* Loop alignment. */ |
2539 | "16:11:8", /* Jump alignment. */ | |
2540 | "0:0:8", /* Label alignment. */ | |
2541 | "16", /* Func alignment. */ | |
64766e8d JH |
2542 | }; |
2543 | ||
2544 | /* core_cost should produce code tuned for Core familly of CPUs. */ | |
2545 | static stringop_algs core_memcpy[2] = { | |
2546 | {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}}, | |
2547 | {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true}, | |
2548 | {-1, libcall, false}}}}; | |
2549 | static stringop_algs core_memset[2] = { | |
2550 | {libcall, {{6, loop_1_byte, true}, | |
2551 | {24, loop, true}, | |
2552 | {8192, rep_prefix_4_byte, true}, | |
2553 | {-1, libcall, false}}}, | |
2554 | {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true}, | |
2555 | {-1, libcall, false}}}}; | |
2556 | ||
2557 | static const | |
2558 | struct processor_costs core_cost = { | |
72bb85f8 | 2559 | { |
d321551c L |
2560 | /* Start of register allocator costs. integer->integer move cost is 2. */ |
2561 | 6, /* cost for loading QImode using movzbl */ | |
2562 | {4, 4, 4}, /* cost of loading integer registers | |
2563 | in QImode, HImode and SImode. | |
2564 | Relative to reg-reg move (2). */ | |
2565 | {6, 6, 6}, /* cost of storing integer registers */ | |
2566 | 2, /* cost of reg,reg fld/fst */ | |
2567 | {6, 6, 8}, /* cost of loading fp registers | |
2568 | in SFmode, DFmode and XFmode */ | |
2569 | {6, 6, 10}, /* cost of storing fp registers | |
2570 | in SFmode, DFmode and XFmode */ | |
2571 | 2, /* cost of moving MMX register */ | |
2572 | {6, 6}, /* cost of loading MMX registers | |
2573 | in SImode and DImode */ | |
2574 | {6, 6}, /* cost of storing MMX registers | |
2575 | in SImode and DImode */ | |
2576 | 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ | |
2577 | {6, 6, 6, 6, 12}, /* cost of loading SSE registers | |
2578 | in 32,64,128,256 and 512-bit */ | |
2579 | {6, 6, 6, 6, 12}, /* cost of storing SSE registers | |
2580 | in 32,64,128,256 and 512-bit */ | |
4e9ad7c9 | 2581 | 6, 6, /* SSE->integer and integer->SSE moves */ |
d321551c | 2582 | /* End of register allocator costs. */ |
72bb85f8 | 2583 | }, |
d321551c | 2584 | |
64766e8d JH |
2585 | COSTS_N_INSNS (1), /* cost of an add instruction */ |
2586 | /* On all chips taken into consideration lea is 2 cycles and more. With | |
2587 | this cost however our current implementation of synth_mult results in | |
2588 | use of unnecessary temporary registers causing regression on several | |
2589 | SPECfp benchmarks. */ | |
2590 | COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ | |
2591 | COSTS_N_INSNS (1), /* variable shift costs */ | |
2592 | COSTS_N_INSNS (1), /* constant shift costs */ | |
2593 | {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
2594 | COSTS_N_INSNS (4), /* HI */ | |
2595 | COSTS_N_INSNS (3), /* SI */ | |
a2ef9558 MT |
2596 | /* Here we tune for Sandybridge or newer. */ |
2597 | COSTS_N_INSNS (3), /* DI */ | |
2598 | COSTS_N_INSNS (3)}, /* other */ | |
64766e8d | 2599 | 0, /* cost of multiply per each bit set */ |
02308bd3 MT |
2600 | /* Expanding div/mod currently doesn't consider parallelism. So the cost |
2601 | model is not realistic. We compensate by increasing the latencies a bit. */ | |
2602 | {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */ | |
2603 | COSTS_N_INSNS (11), /* HI */ | |
2604 | COSTS_N_INSNS (14), /* SI */ | |
ffa3ce53 JH |
2605 | COSTS_N_INSNS (81), /* DI */ |
2606 | COSTS_N_INSNS (81)}, /* other */ | |
64766e8d JH |
2607 | COSTS_N_INSNS (1), /* cost of movsx */ |
2608 | COSTS_N_INSNS (1), /* cost of movzx */ | |
2609 | 8, /* "large" insn */ | |
2610 | 17, /* MOVE_RATIO */ | |
25e22b19 | 2611 | 6, /* CLEAR_RATIO */ |
64766e8d JH |
2612 | {4, 4, 4}, /* cost of loading integer registers |
2613 | in QImode, HImode and SImode. | |
2614 | Relative to reg-reg move (2). */ | |
ffa3ce53 | 2615 | {6, 6, 6}, /* cost of storing integer registers */ |
d321551c L |
2616 | {6, 6, 6, 6, 12}, /* cost of loading SSE register |
2617 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
2618 | {6, 6, 6, 6, 12}, /* cost of storing SSE register | |
2619 | in 32bit, 64bit, 128bit, 256bit and 512bit */ | |
df41dbaf | 2620 | {6, 6, 6, 6, 12}, /* cost of unaligned loads. */ |
df41dbaf | 2621 | {6, 6, 6, 6, 12}, /* cost of unaligned stores. */ |
d321551c L |
2622 | 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ |
2623 | 2, /* cost of moving SSE register to integer. */ | |
a4fe6139 JH |
2624 | /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops, |
2625 | rec. throughput 6. | |
2626 | So 5 uops statically and one uops per load. */ | |
2627 | 10, 6, /* Gather load static, per_elt. */ | |
2628 | 10, 6, /* Gather store static, per_elt. */ | |
64766e8d JH |
2629 | 64, /* size of l1 cache. */ |
2630 | 512, /* size of l2 cache. */ | |
2631 | 64, /* size of prefetch block */ | |
2632 | 6, /* number of parallel prefetches */ | |
2633 | /* FIXME perhaps more appropriate value is 5. */ | |
2634 | 3, /* Branch cost */ | |
ef9eec0b JH |
2635 | COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ |
2636 | COSTS_N_INSNS (5), /* cost of FMUL instruction. */ | |
ffa3ce53 | 2637 | /* 10-24 */ |
ef9eec0b JH |
2638 | COSTS_N_INSNS (24), /* cost of FDIV instruction. */ |
2639 | COSTS_N_INSNS (1), /* cost of FABS instruction. */ | |
2640 | COSTS_N_INSNS (1), /* cost of FCHS instruction. */ | |
ffa3ce53 | 2641 | COSTS_N_INSNS (23), /* cost of FSQRT instruction. */ |
6065f444 | 2642 | |
c53c148c | 2643 | COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ |
6065f444 JH |
2644 | COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ |
2645 | COSTS_N_INSNS (4), /* cost of MULSS instruction. */ | |
2646 | COSTS_N_INSNS (5), /* cost of MULSD instruction. */ | |
c53c148c JH |
2647 | COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ |
2648 | COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ | |
6065f444 JH |
2649 | COSTS_N_INSNS (18), /* cost of DIVSS instruction. */ |
2650 | COSTS_N_INSNS (32), /* cost of DIVSD instruction. */ | |
2651 | COSTS_N_INSNS (30), /* cost of SQRTSS instruction. */ | |
2652 | COSTS_N_INSNS (58), /* cost of SQRTSD instruction. */ | |
64766e8d JH |
2653 | 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ |
2654 | core_memcpy, | |
2655 | core_memset, | |
f6fd8f2b JH |
2656 | COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ |
2657 | COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ | |
7dc58b50 ML |
2658 | "16:11:8", /* Loop alignment. */ |
2659 | "16:11:8", /* Jump alignment. */ | |
2660 | "0:0:8", /* Label alignment. */ | |
2661 | "16", /* Func alignment. */ | |
64766e8d JH |
2662 | }; |
2663 |